{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 15590,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 6.414368184733804e-05,
      "grad_norm": 2.122934421744886,
      "learning_rate": 1.2828736369467608e-07,
      "loss": 1.1536,
      "step": 1
    },
    {
      "epoch": 0.00032071840923669016,
      "grad_norm": 2.6120313710646594,
      "learning_rate": 6.414368184733804e-07,
      "loss": 0.8767,
      "step": 5
    },
    {
      "epoch": 0.0006414368184733803,
      "grad_norm": 1.9036320491341159,
      "learning_rate": 1.2828736369467608e-06,
      "loss": 1.1617,
      "step": 10
    },
    {
      "epoch": 0.0009621552277100705,
      "grad_norm": 2.2675442011112223,
      "learning_rate": 1.9243104554201413e-06,
      "loss": 0.9348,
      "step": 15
    },
    {
      "epoch": 0.0012828736369467607,
      "grad_norm": 1.8668180685612314,
      "learning_rate": 2.5657472738935216e-06,
      "loss": 0.9887,
      "step": 20
    },
    {
      "epoch": 0.001603592046183451,
      "grad_norm": 3.7598222149067344,
      "learning_rate": 3.2071840923669024e-06,
      "loss": 0.9933,
      "step": 25
    },
    {
      "epoch": 0.001924310455420141,
      "grad_norm": 0.7497789448952673,
      "learning_rate": 3.848620910840283e-06,
      "loss": 0.8951,
      "step": 30
    },
    {
      "epoch": 0.0022450288646568314,
      "grad_norm": 0.7497466999070768,
      "learning_rate": 4.490057729313663e-06,
      "loss": 1.1075,
      "step": 35
    },
    {
      "epoch": 0.0025657472738935213,
      "grad_norm": 0.6269808680953072,
      "learning_rate": 5.131494547787043e-06,
      "loss": 0.9902,
      "step": 40
    },
    {
      "epoch": 0.0028864656831302116,
      "grad_norm": 0.7670239135600633,
      "learning_rate": 5.7729313662604236e-06,
      "loss": 0.8813,
      "step": 45
    },
    {
      "epoch": 0.003207184092366902,
      "grad_norm": 2.7386818812948612,
      "learning_rate": 6.414368184733805e-06,
      "loss": 1.0156,
      "step": 50
    },
    {
      "epoch": 0.003527902501603592,
      "grad_norm": 0.6074882325288049,
      "learning_rate": 7.055805003207184e-06,
      "loss": 0.8396,
      "step": 55
    },
    {
      "epoch": 0.003848620910840282,
      "grad_norm": 0.5127156771589034,
      "learning_rate": 7.697241821680565e-06,
      "loss": 0.8716,
      "step": 60
    },
    {
      "epoch": 0.004169339320076972,
      "grad_norm": 1.9056440284966447,
      "learning_rate": 8.338678640153946e-06,
      "loss": 0.854,
      "step": 65
    },
    {
      "epoch": 0.004490057729313663,
      "grad_norm": 0.5220731546600494,
      "learning_rate": 8.980115458627326e-06,
      "loss": 0.7033,
      "step": 70
    },
    {
      "epoch": 0.004810776138550353,
      "grad_norm": 0.8528246103677898,
      "learning_rate": 9.621552277100706e-06,
      "loss": 0.7803,
      "step": 75
    },
    {
      "epoch": 0.005131494547787043,
      "grad_norm": 0.9553069537717978,
      "learning_rate": 1.0262989095574087e-05,
      "loss": 0.8577,
      "step": 80
    },
    {
      "epoch": 0.005452212957023733,
      "grad_norm": 1.089966886886208,
      "learning_rate": 1.0904425914047467e-05,
      "loss": 0.5766,
      "step": 85
    },
    {
      "epoch": 0.005772931366260423,
      "grad_norm": 1.3259601452666736,
      "learning_rate": 1.1545862732520847e-05,
      "loss": 0.6605,
      "step": 90
    },
    {
      "epoch": 0.006093649775497113,
      "grad_norm": 0.5501705053457395,
      "learning_rate": 1.2187299550994227e-05,
      "loss": 0.622,
      "step": 95
    },
    {
      "epoch": 0.006414368184733804,
      "grad_norm": 0.565393913728341,
      "learning_rate": 1.282873636946761e-05,
      "loss": 0.6598,
      "step": 100
    },
    {
      "epoch": 0.006735086593970494,
      "grad_norm": 0.8355349528605185,
      "learning_rate": 1.3470173187940988e-05,
      "loss": 0.57,
      "step": 105
    },
    {
      "epoch": 0.007055805003207184,
      "grad_norm": 0.7311107470147664,
      "learning_rate": 1.4111610006414368e-05,
      "loss": 0.7012,
      "step": 110
    },
    {
      "epoch": 0.0073765234124438745,
      "grad_norm": 0.7172503923642882,
      "learning_rate": 1.4753046824887749e-05,
      "loss": 0.6794,
      "step": 115
    },
    {
      "epoch": 0.007697241821680564,
      "grad_norm": 0.6517343625027339,
      "learning_rate": 1.539448364336113e-05,
      "loss": 0.7512,
      "step": 120
    },
    {
      "epoch": 0.008017960230917255,
      "grad_norm": 0.7506138412539792,
      "learning_rate": 1.603592046183451e-05,
      "loss": 0.6939,
      "step": 125
    },
    {
      "epoch": 0.008338678640153944,
      "grad_norm": 0.6731898912128177,
      "learning_rate": 1.667735728030789e-05,
      "loss": 0.6853,
      "step": 130
    },
    {
      "epoch": 0.008659397049390635,
      "grad_norm": 0.5526935784877048,
      "learning_rate": 1.731879409878127e-05,
      "loss": 0.7614,
      "step": 135
    },
    {
      "epoch": 0.008980115458627326,
      "grad_norm": 0.7407967682593112,
      "learning_rate": 1.7960230917254652e-05,
      "loss": 0.6847,
      "step": 140
    },
    {
      "epoch": 0.009300833867864015,
      "grad_norm": 1.0558900513241394,
      "learning_rate": 1.8601667735728032e-05,
      "loss": 0.6291,
      "step": 145
    },
    {
      "epoch": 0.009621552277100705,
      "grad_norm": 0.534720549756236,
      "learning_rate": 1.9243104554201412e-05,
      "loss": 0.6933,
      "step": 150
    },
    {
      "epoch": 0.009942270686337396,
      "grad_norm": 0.8533880407106053,
      "learning_rate": 1.9884541372674793e-05,
      "loss": 0.7405,
      "step": 155
    },
    {
      "epoch": 0.010262989095574085,
      "grad_norm": 2.2157811263492633,
      "learning_rate": 2.0525978191148173e-05,
      "loss": 0.6605,
      "step": 160
    },
    {
      "epoch": 0.010583707504810776,
      "grad_norm": 0.741553600188979,
      "learning_rate": 2.1167415009621553e-05,
      "loss": 0.6929,
      "step": 165
    },
    {
      "epoch": 0.010904425914047467,
      "grad_norm": 0.592672329081525,
      "learning_rate": 2.1808851828094934e-05,
      "loss": 0.7712,
      "step": 170
    },
    {
      "epoch": 0.011225144323284156,
      "grad_norm": 0.7143661642401767,
      "learning_rate": 2.2450288646568314e-05,
      "loss": 0.7264,
      "step": 175
    },
    {
      "epoch": 0.011545862732520847,
      "grad_norm": 0.7168820160805862,
      "learning_rate": 2.3091725465041694e-05,
      "loss": 0.7147,
      "step": 180
    },
    {
      "epoch": 0.011866581141757537,
      "grad_norm": 0.8106566714421187,
      "learning_rate": 2.3733162283515075e-05,
      "loss": 0.7091,
      "step": 185
    },
    {
      "epoch": 0.012187299550994226,
      "grad_norm": 1.131984585130431,
      "learning_rate": 2.4374599101988455e-05,
      "loss": 0.6725,
      "step": 190
    },
    {
      "epoch": 0.012508017960230917,
      "grad_norm": 0.5991057607118903,
      "learning_rate": 2.5016035920461832e-05,
      "loss": 0.5288,
      "step": 195
    },
    {
      "epoch": 0.012828736369467608,
      "grad_norm": 0.7441333776346593,
      "learning_rate": 2.565747273893522e-05,
      "loss": 0.6001,
      "step": 200
    },
    {
      "epoch": 0.013149454778704297,
      "grad_norm": 0.7177668887803592,
      "learning_rate": 2.6298909557408596e-05,
      "loss": 0.729,
      "step": 205
    },
    {
      "epoch": 0.013470173187940988,
      "grad_norm": 1.152356658408425,
      "learning_rate": 2.6940346375881976e-05,
      "loss": 0.649,
      "step": 210
    },
    {
      "epoch": 0.013790891597177678,
      "grad_norm": 0.8692844040434968,
      "learning_rate": 2.758178319435536e-05,
      "loss": 0.7514,
      "step": 215
    },
    {
      "epoch": 0.014111610006414367,
      "grad_norm": 0.7731506164196528,
      "learning_rate": 2.8223220012828737e-05,
      "loss": 0.7303,
      "step": 220
    },
    {
      "epoch": 0.014432328415651058,
      "grad_norm": 0.6675669855403799,
      "learning_rate": 2.8864656831302117e-05,
      "loss": 0.5974,
      "step": 225
    },
    {
      "epoch": 0.014753046824887749,
      "grad_norm": 0.6511258667141646,
      "learning_rate": 2.9506093649775497e-05,
      "loss": 0.6502,
      "step": 230
    },
    {
      "epoch": 0.015073765234124438,
      "grad_norm": 0.8153736796805081,
      "learning_rate": 3.014753046824888e-05,
      "loss": 0.7187,
      "step": 235
    },
    {
      "epoch": 0.015394483643361129,
      "grad_norm": 0.682020511101791,
      "learning_rate": 3.078896728672226e-05,
      "loss": 0.7687,
      "step": 240
    },
    {
      "epoch": 0.01571520205259782,
      "grad_norm": 0.9723518475601368,
      "learning_rate": 3.143040410519564e-05,
      "loss": 0.6333,
      "step": 245
    },
    {
      "epoch": 0.01603592046183451,
      "grad_norm": 0.6642430373016617,
      "learning_rate": 3.207184092366902e-05,
      "loss": 0.7503,
      "step": 250
    },
    {
      "epoch": 0.0163566388710712,
      "grad_norm": 1.0604072659225818,
      "learning_rate": 3.27132777421424e-05,
      "loss": 0.7296,
      "step": 255
    },
    {
      "epoch": 0.01667735728030789,
      "grad_norm": 0.5389238146909613,
      "learning_rate": 3.335471456061578e-05,
      "loss": 0.6449,
      "step": 260
    },
    {
      "epoch": 0.01699807568954458,
      "grad_norm": 1.0886777633244675,
      "learning_rate": 3.3996151379089166e-05,
      "loss": 0.6087,
      "step": 265
    },
    {
      "epoch": 0.01731879409878127,
      "grad_norm": 0.7740455363235514,
      "learning_rate": 3.463758819756254e-05,
      "loss": 0.7716,
      "step": 270
    },
    {
      "epoch": 0.01763951250801796,
      "grad_norm": 0.7842668340726671,
      "learning_rate": 3.527902501603592e-05,
      "loss": 0.6184,
      "step": 275
    },
    {
      "epoch": 0.01796023091725465,
      "grad_norm": 0.8724306321758412,
      "learning_rate": 3.5920461834509304e-05,
      "loss": 0.5647,
      "step": 280
    },
    {
      "epoch": 0.018280949326491342,
      "grad_norm": 0.6108159651722537,
      "learning_rate": 3.656189865298269e-05,
      "loss": 0.7748,
      "step": 285
    },
    {
      "epoch": 0.01860166773572803,
      "grad_norm": 0.554729905784846,
      "learning_rate": 3.7203335471456064e-05,
      "loss": 0.6969,
      "step": 290
    },
    {
      "epoch": 0.01892238614496472,
      "grad_norm": 0.5263100377774543,
      "learning_rate": 3.784477228992944e-05,
      "loss": 0.6331,
      "step": 295
    },
    {
      "epoch": 0.01924310455420141,
      "grad_norm": 0.7458575860438468,
      "learning_rate": 3.8486209108402825e-05,
      "loss": 0.7178,
      "step": 300
    },
    {
      "epoch": 0.0195638229634381,
      "grad_norm": 0.7209749688824592,
      "learning_rate": 3.912764592687621e-05,
      "loss": 0.7774,
      "step": 305
    },
    {
      "epoch": 0.019884541372674792,
      "grad_norm": 0.8894616503150261,
      "learning_rate": 3.9769082745349585e-05,
      "loss": 0.8354,
      "step": 310
    },
    {
      "epoch": 0.020205259781911483,
      "grad_norm": 0.6322923436990817,
      "learning_rate": 4.041051956382296e-05,
      "loss": 0.6009,
      "step": 315
    },
    {
      "epoch": 0.02052597819114817,
      "grad_norm": 0.9519419320088668,
      "learning_rate": 4.1051956382296346e-05,
      "loss": 0.61,
      "step": 320
    },
    {
      "epoch": 0.02084669660038486,
      "grad_norm": 0.654969001631436,
      "learning_rate": 4.169339320076972e-05,
      "loss": 0.602,
      "step": 325
    },
    {
      "epoch": 0.021167415009621552,
      "grad_norm": 0.6250956091655624,
      "learning_rate": 4.233483001924311e-05,
      "loss": 0.6451,
      "step": 330
    },
    {
      "epoch": 0.021488133418858243,
      "grad_norm": 0.7392153639819625,
      "learning_rate": 4.297626683771649e-05,
      "loss": 0.7724,
      "step": 335
    },
    {
      "epoch": 0.021808851828094934,
      "grad_norm": 0.7914340872699686,
      "learning_rate": 4.361770365618987e-05,
      "loss": 0.7245,
      "step": 340
    },
    {
      "epoch": 0.022129570237331624,
      "grad_norm": 0.5688389882467555,
      "learning_rate": 4.4259140474663244e-05,
      "loss": 0.5756,
      "step": 345
    },
    {
      "epoch": 0.02245028864656831,
      "grad_norm": 0.6860675746425041,
      "learning_rate": 4.490057729313663e-05,
      "loss": 0.6515,
      "step": 350
    },
    {
      "epoch": 0.022771007055805002,
      "grad_norm": 0.8497624484329163,
      "learning_rate": 4.554201411161001e-05,
      "loss": 0.742,
      "step": 355
    },
    {
      "epoch": 0.023091725465041693,
      "grad_norm": 0.9589070592978919,
      "learning_rate": 4.618345093008339e-05,
      "loss": 0.7261,
      "step": 360
    },
    {
      "epoch": 0.023412443874278384,
      "grad_norm": 0.5397605849852198,
      "learning_rate": 4.6824887748556765e-05,
      "loss": 0.721,
      "step": 365
    },
    {
      "epoch": 0.023733162283515075,
      "grad_norm": 0.4218758453965537,
      "learning_rate": 4.746632456703015e-05,
      "loss": 0.7008,
      "step": 370
    },
    {
      "epoch": 0.024053880692751765,
      "grad_norm": 0.4660237223228576,
      "learning_rate": 4.810776138550353e-05,
      "loss": 0.5954,
      "step": 375
    },
    {
      "epoch": 0.024374599101988453,
      "grad_norm": 1.1414044523272346,
      "learning_rate": 4.874919820397691e-05,
      "loss": 0.7092,
      "step": 380
    },
    {
      "epoch": 0.024695317511225143,
      "grad_norm": 0.7794538849217394,
      "learning_rate": 4.939063502245029e-05,
      "loss": 0.6556,
      "step": 385
    },
    {
      "epoch": 0.025016035920461834,
      "grad_norm": 0.6784254428885176,
      "learning_rate": 5.0032071840923663e-05,
      "loss": 0.6523,
      "step": 390
    },
    {
      "epoch": 0.025336754329698525,
      "grad_norm": 0.5550050199692612,
      "learning_rate": 5.0673508659397054e-05,
      "loss": 0.7065,
      "step": 395
    },
    {
      "epoch": 0.025657472738935216,
      "grad_norm": 1.3489642897531091,
      "learning_rate": 5.131494547787044e-05,
      "loss": 0.657,
      "step": 400
    },
    {
      "epoch": 0.025978191148171906,
      "grad_norm": 0.8799442657849393,
      "learning_rate": 5.195638229634381e-05,
      "loss": 0.7712,
      "step": 405
    },
    {
      "epoch": 0.026298909557408594,
      "grad_norm": 0.6211518086394292,
      "learning_rate": 5.259781911481719e-05,
      "loss": 0.6556,
      "step": 410
    },
    {
      "epoch": 0.026619627966645285,
      "grad_norm": 0.527786179579098,
      "learning_rate": 5.3239255933290575e-05,
      "loss": 0.6304,
      "step": 415
    },
    {
      "epoch": 0.026940346375881975,
      "grad_norm": 0.6225940856068456,
      "learning_rate": 5.388069275176395e-05,
      "loss": 0.7504,
      "step": 420
    },
    {
      "epoch": 0.027261064785118666,
      "grad_norm": 0.7472577597094603,
      "learning_rate": 5.4522129570237336e-05,
      "loss": 0.5737,
      "step": 425
    },
    {
      "epoch": 0.027581783194355357,
      "grad_norm": 0.9003123884674169,
      "learning_rate": 5.516356638871072e-05,
      "loss": 0.6751,
      "step": 430
    },
    {
      "epoch": 0.027902501603592048,
      "grad_norm": 1.193348964937134,
      "learning_rate": 5.580500320718409e-05,
      "loss": 0.6685,
      "step": 435
    },
    {
      "epoch": 0.028223220012828735,
      "grad_norm": 0.8207452374854483,
      "learning_rate": 5.644644002565747e-05,
      "loss": 0.5606,
      "step": 440
    },
    {
      "epoch": 0.028543938422065426,
      "grad_norm": 0.6253317338492933,
      "learning_rate": 5.7087876844130864e-05,
      "loss": 0.6848,
      "step": 445
    },
    {
      "epoch": 0.028864656831302116,
      "grad_norm": 0.5089340890778841,
      "learning_rate": 5.7729313662604234e-05,
      "loss": 0.5969,
      "step": 450
    },
    {
      "epoch": 0.029185375240538807,
      "grad_norm": 0.6403611822232731,
      "learning_rate": 5.837075048107762e-05,
      "loss": 0.6663,
      "step": 455
    },
    {
      "epoch": 0.029506093649775498,
      "grad_norm": 0.9017481128452324,
      "learning_rate": 5.9012187299550994e-05,
      "loss": 0.6253,
      "step": 460
    },
    {
      "epoch": 0.02982681205901219,
      "grad_norm": 0.7102935907261797,
      "learning_rate": 5.965362411802438e-05,
      "loss": 0.6032,
      "step": 465
    },
    {
      "epoch": 0.030147530468248876,
      "grad_norm": 0.572528044090495,
      "learning_rate": 6.029506093649776e-05,
      "loss": 0.7059,
      "step": 470
    },
    {
      "epoch": 0.030468248877485567,
      "grad_norm": 0.6507630672872388,
      "learning_rate": 6.093649775497113e-05,
      "loss": 0.551,
      "step": 475
    },
    {
      "epoch": 0.030788967286722257,
      "grad_norm": 0.4787872258590136,
      "learning_rate": 6.157793457344452e-05,
      "loss": 0.4953,
      "step": 480
    },
    {
      "epoch": 0.031109685695958948,
      "grad_norm": 0.6446626662145857,
      "learning_rate": 6.22193713919179e-05,
      "loss": 0.7073,
      "step": 485
    },
    {
      "epoch": 0.03143040410519564,
      "grad_norm": 0.46176975999305003,
      "learning_rate": 6.286080821039128e-05,
      "loss": 0.7031,
      "step": 490
    },
    {
      "epoch": 0.03175112251443233,
      "grad_norm": 0.6364571216466376,
      "learning_rate": 6.350224502886466e-05,
      "loss": 0.7208,
      "step": 495
    },
    {
      "epoch": 0.03207184092366902,
      "grad_norm": 0.6441271299481783,
      "learning_rate": 6.414368184733804e-05,
      "loss": 0.651,
      "step": 500
    },
    {
      "epoch": 0.03239255933290571,
      "grad_norm": 0.5277240516380076,
      "learning_rate": 6.478511866581141e-05,
      "loss": 0.7596,
      "step": 505
    },
    {
      "epoch": 0.0327132777421424,
      "grad_norm": 0.6102741778617242,
      "learning_rate": 6.54265554842848e-05,
      "loss": 0.8127,
      "step": 510
    },
    {
      "epoch": 0.033033996151379086,
      "grad_norm": 1.2909493866489476,
      "learning_rate": 6.606799230275818e-05,
      "loss": 0.6172,
      "step": 515
    },
    {
      "epoch": 0.03335471456061578,
      "grad_norm": 0.8290001510292774,
      "learning_rate": 6.670942912123156e-05,
      "loss": 0.7024,
      "step": 520
    },
    {
      "epoch": 0.03367543296985247,
      "grad_norm": 0.5082074367378367,
      "learning_rate": 6.735086593970495e-05,
      "loss": 0.5993,
      "step": 525
    },
    {
      "epoch": 0.03399615137908916,
      "grad_norm": 0.8948141239538124,
      "learning_rate": 6.799230275817833e-05,
      "loss": 0.6288,
      "step": 530
    },
    {
      "epoch": 0.03431686978832585,
      "grad_norm": 0.704188041016483,
      "learning_rate": 6.86337395766517e-05,
      "loss": 0.6173,
      "step": 535
    },
    {
      "epoch": 0.03463758819756254,
      "grad_norm": 0.8493617205406083,
      "learning_rate": 6.927517639512509e-05,
      "loss": 0.6472,
      "step": 540
    },
    {
      "epoch": 0.03495830660679923,
      "grad_norm": 0.6071336551640186,
      "learning_rate": 6.991661321359846e-05,
      "loss": 0.7066,
      "step": 545
    },
    {
      "epoch": 0.03527902501603592,
      "grad_norm": 0.6299761061285323,
      "learning_rate": 7.055805003207184e-05,
      "loss": 0.5004,
      "step": 550
    },
    {
      "epoch": 0.03559974342527261,
      "grad_norm": 0.36030076856010784,
      "learning_rate": 7.119948685054522e-05,
      "loss": 0.5939,
      "step": 555
    },
    {
      "epoch": 0.0359204618345093,
      "grad_norm": 0.5657747344505833,
      "learning_rate": 7.184092366901861e-05,
      "loss": 0.6394,
      "step": 560
    },
    {
      "epoch": 0.03624118024374599,
      "grad_norm": 0.5512464769253931,
      "learning_rate": 7.248236048749199e-05,
      "loss": 0.7496,
      "step": 565
    },
    {
      "epoch": 0.036561898652982684,
      "grad_norm": 1.6012481016769327,
      "learning_rate": 7.312379730596537e-05,
      "loss": 0.74,
      "step": 570
    },
    {
      "epoch": 0.03688261706221937,
      "grad_norm": 0.539931431422469,
      "learning_rate": 7.376523412443874e-05,
      "loss": 0.655,
      "step": 575
    },
    {
      "epoch": 0.03720333547145606,
      "grad_norm": 0.5792692922947517,
      "learning_rate": 7.440667094291213e-05,
      "loss": 0.6268,
      "step": 580
    },
    {
      "epoch": 0.03752405388069275,
      "grad_norm": 0.44904646394711184,
      "learning_rate": 7.504810776138551e-05,
      "loss": 0.621,
      "step": 585
    },
    {
      "epoch": 0.03784477228992944,
      "grad_norm": 0.38995414700568637,
      "learning_rate": 7.568954457985888e-05,
      "loss": 0.479,
      "step": 590
    },
    {
      "epoch": 0.03816549069916613,
      "grad_norm": 0.6100292909911376,
      "learning_rate": 7.633098139833227e-05,
      "loss": 0.5087,
      "step": 595
    },
    {
      "epoch": 0.03848620910840282,
      "grad_norm": 0.767232067956154,
      "learning_rate": 7.697241821680565e-05,
      "loss": 0.7094,
      "step": 600
    },
    {
      "epoch": 0.03880692751763951,
      "grad_norm": 0.5093223662182627,
      "learning_rate": 7.761385503527902e-05,
      "loss": 0.6216,
      "step": 605
    },
    {
      "epoch": 0.0391276459268762,
      "grad_norm": 0.604996949026468,
      "learning_rate": 7.825529185375242e-05,
      "loss": 0.6343,
      "step": 610
    },
    {
      "epoch": 0.039448364336112894,
      "grad_norm": 0.6313031887029451,
      "learning_rate": 7.88967286722258e-05,
      "loss": 0.6814,
      "step": 615
    },
    {
      "epoch": 0.039769082745349585,
      "grad_norm": 0.5515684818028812,
      "learning_rate": 7.953816549069917e-05,
      "loss": 0.6319,
      "step": 620
    },
    {
      "epoch": 0.040089801154586276,
      "grad_norm": 0.9067875561472081,
      "learning_rate": 8.017960230917255e-05,
      "loss": 0.626,
      "step": 625
    },
    {
      "epoch": 0.040410519563822966,
      "grad_norm": 0.4402348046376401,
      "learning_rate": 8.082103912764592e-05,
      "loss": 0.6581,
      "step": 630
    },
    {
      "epoch": 0.04073123797305965,
      "grad_norm": 0.6653624732467279,
      "learning_rate": 8.146247594611931e-05,
      "loss": 0.6266,
      "step": 635
    },
    {
      "epoch": 0.04105195638229634,
      "grad_norm": 0.7506028416479603,
      "learning_rate": 8.210391276459269e-05,
      "loss": 0.7304,
      "step": 640
    },
    {
      "epoch": 0.04137267479153303,
      "grad_norm": 0.43305772472870374,
      "learning_rate": 8.274534958306606e-05,
      "loss": 0.6272,
      "step": 645
    },
    {
      "epoch": 0.04169339320076972,
      "grad_norm": 0.7883927079167802,
      "learning_rate": 8.338678640153945e-05,
      "loss": 0.564,
      "step": 650
    },
    {
      "epoch": 0.04201411161000641,
      "grad_norm": 0.6406069976891953,
      "learning_rate": 8.402822322001283e-05,
      "loss": 0.6594,
      "step": 655
    },
    {
      "epoch": 0.042334830019243104,
      "grad_norm": 0.6650787540082842,
      "learning_rate": 8.466966003848621e-05,
      "loss": 0.6086,
      "step": 660
    },
    {
      "epoch": 0.042655548428479795,
      "grad_norm": 0.6280025445964529,
      "learning_rate": 8.53110968569596e-05,
      "loss": 0.6188,
      "step": 665
    },
    {
      "epoch": 0.042976266837716486,
      "grad_norm": 0.6181001304138187,
      "learning_rate": 8.595253367543298e-05,
      "loss": 0.6454,
      "step": 670
    },
    {
      "epoch": 0.043296985246953176,
      "grad_norm": 0.9164302121431295,
      "learning_rate": 8.659397049390635e-05,
      "loss": 0.7409,
      "step": 675
    },
    {
      "epoch": 0.04361770365618987,
      "grad_norm": 0.5146934352157929,
      "learning_rate": 8.723540731237973e-05,
      "loss": 0.7961,
      "step": 680
    },
    {
      "epoch": 0.04393842206542656,
      "grad_norm": 0.8884783771604745,
      "learning_rate": 8.787684413085312e-05,
      "loss": 0.7023,
      "step": 685
    },
    {
      "epoch": 0.04425914047466325,
      "grad_norm": 0.5972459928844025,
      "learning_rate": 8.851828094932649e-05,
      "loss": 0.6437,
      "step": 690
    },
    {
      "epoch": 0.04457985888389993,
      "grad_norm": 1.027137591537084,
      "learning_rate": 8.915971776779987e-05,
      "loss": 0.6461,
      "step": 695
    },
    {
      "epoch": 0.04490057729313662,
      "grad_norm": 0.684561126713197,
      "learning_rate": 8.980115458627326e-05,
      "loss": 0.6417,
      "step": 700
    },
    {
      "epoch": 0.045221295702373314,
      "grad_norm": 0.5791897637489775,
      "learning_rate": 9.044259140474664e-05,
      "loss": 0.6545,
      "step": 705
    },
    {
      "epoch": 0.045542014111610005,
      "grad_norm": 0.6093322265483176,
      "learning_rate": 9.108402822322002e-05,
      "loss": 0.5431,
      "step": 710
    },
    {
      "epoch": 0.045862732520846695,
      "grad_norm": 1.20412780035678,
      "learning_rate": 9.172546504169339e-05,
      "loss": 0.6122,
      "step": 715
    },
    {
      "epoch": 0.046183450930083386,
      "grad_norm": 0.4344736289735069,
      "learning_rate": 9.236690186016678e-05,
      "loss": 0.6896,
      "step": 720
    },
    {
      "epoch": 0.04650416933932008,
      "grad_norm": 0.479553471093618,
      "learning_rate": 9.300833867864016e-05,
      "loss": 0.7446,
      "step": 725
    },
    {
      "epoch": 0.04682488774855677,
      "grad_norm": 0.4175717995477323,
      "learning_rate": 9.364977549711353e-05,
      "loss": 0.5635,
      "step": 730
    },
    {
      "epoch": 0.04714560615779346,
      "grad_norm": 0.43527442203162864,
      "learning_rate": 9.429121231558691e-05,
      "loss": 0.5984,
      "step": 735
    },
    {
      "epoch": 0.04746632456703015,
      "grad_norm": 0.6764034597420034,
      "learning_rate": 9.49326491340603e-05,
      "loss": 0.6575,
      "step": 740
    },
    {
      "epoch": 0.04778704297626684,
      "grad_norm": 0.6994297524226791,
      "learning_rate": 9.557408595253368e-05,
      "loss": 0.6381,
      "step": 745
    },
    {
      "epoch": 0.04810776138550353,
      "grad_norm": 0.5924112864276749,
      "learning_rate": 9.621552277100707e-05,
      "loss": 0.6273,
      "step": 750
    },
    {
      "epoch": 0.04842847979474022,
      "grad_norm": 0.529839489096258,
      "learning_rate": 9.685695958948045e-05,
      "loss": 0.5524,
      "step": 755
    },
    {
      "epoch": 0.048749198203976905,
      "grad_norm": 0.5412474092793377,
      "learning_rate": 9.749839640795382e-05,
      "loss": 0.6584,
      "step": 760
    },
    {
      "epoch": 0.049069916613213596,
      "grad_norm": 0.62325178443721,
      "learning_rate": 9.81398332264272e-05,
      "loss": 0.7556,
      "step": 765
    },
    {
      "epoch": 0.04939063502245029,
      "grad_norm": 0.6185109985068113,
      "learning_rate": 9.878127004490059e-05,
      "loss": 0.6396,
      "step": 770
    },
    {
      "epoch": 0.04971135343168698,
      "grad_norm": 0.5650081284141024,
      "learning_rate": 9.942270686337396e-05,
      "loss": 0.6761,
      "step": 775
    },
    {
      "epoch": 0.05003207184092367,
      "grad_norm": 0.6838574740900004,
      "learning_rate": 0.00010006414368184733,
      "loss": 0.6228,
      "step": 780
    },
    {
      "epoch": 0.05035279025016036,
      "grad_norm": 0.6196830613093786,
      "learning_rate": 0.00010070558050032072,
      "loss": 0.6648,
      "step": 785
    },
    {
      "epoch": 0.05067350865939705,
      "grad_norm": 0.5504649558203162,
      "learning_rate": 0.00010134701731879411,
      "loss": 0.697,
      "step": 790
    },
    {
      "epoch": 0.05099422706863374,
      "grad_norm": 0.654837344932131,
      "learning_rate": 0.00010198845413726748,
      "loss": 0.6986,
      "step": 795
    },
    {
      "epoch": 0.05131494547787043,
      "grad_norm": 0.7011329232246133,
      "learning_rate": 0.00010262989095574088,
      "loss": 0.7206,
      "step": 800
    },
    {
      "epoch": 0.05163566388710712,
      "grad_norm": 0.6807528459174979,
      "learning_rate": 0.00010327132777421425,
      "loss": 0.6834,
      "step": 805
    },
    {
      "epoch": 0.05195638229634381,
      "grad_norm": 0.8856217259425705,
      "learning_rate": 0.00010391276459268762,
      "loss": 0.7028,
      "step": 810
    },
    {
      "epoch": 0.052277100705580504,
      "grad_norm": 0.5962908888781525,
      "learning_rate": 0.00010455420141116101,
      "loss": 0.5113,
      "step": 815
    },
    {
      "epoch": 0.05259781911481719,
      "grad_norm": 0.9014177998142,
      "learning_rate": 0.00010519563822963438,
      "loss": 0.6129,
      "step": 820
    },
    {
      "epoch": 0.05291853752405388,
      "grad_norm": 0.6753791164158136,
      "learning_rate": 0.00010583707504810775,
      "loss": 0.756,
      "step": 825
    },
    {
      "epoch": 0.05323925593329057,
      "grad_norm": 0.48791891735015575,
      "learning_rate": 0.00010647851186658115,
      "loss": 0.5352,
      "step": 830
    },
    {
      "epoch": 0.05355997434252726,
      "grad_norm": 0.7373582383544524,
      "learning_rate": 0.00010711994868505453,
      "loss": 0.7345,
      "step": 835
    },
    {
      "epoch": 0.05388069275176395,
      "grad_norm": 0.49964472362766127,
      "learning_rate": 0.0001077613855035279,
      "loss": 0.7314,
      "step": 840
    },
    {
      "epoch": 0.05420141116100064,
      "grad_norm": 0.48415921267506284,
      "learning_rate": 0.0001084028223220013,
      "loss": 0.5548,
      "step": 845
    },
    {
      "epoch": 0.05452212957023733,
      "grad_norm": 0.6197607704084165,
      "learning_rate": 0.00010904425914047467,
      "loss": 0.6271,
      "step": 850
    },
    {
      "epoch": 0.05484284797947402,
      "grad_norm": 0.677683386452661,
      "learning_rate": 0.00010968569595894804,
      "loss": 0.7739,
      "step": 855
    },
    {
      "epoch": 0.055163566388710714,
      "grad_norm": 0.7298215600744931,
      "learning_rate": 0.00011032713277742144,
      "loss": 0.6813,
      "step": 860
    },
    {
      "epoch": 0.055484284797947404,
      "grad_norm": 0.49556474863687744,
      "learning_rate": 0.00011096856959589481,
      "loss": 0.7165,
      "step": 865
    },
    {
      "epoch": 0.055805003207184095,
      "grad_norm": 0.4755941527376833,
      "learning_rate": 0.00011161000641436818,
      "loss": 0.7439,
      "step": 870
    },
    {
      "epoch": 0.056125721616420786,
      "grad_norm": 0.8183131489420952,
      "learning_rate": 0.00011225144323284158,
      "loss": 0.7741,
      "step": 875
    },
    {
      "epoch": 0.05644644002565747,
      "grad_norm": 0.577588746397813,
      "learning_rate": 0.00011289288005131495,
      "loss": 0.6951,
      "step": 880
    },
    {
      "epoch": 0.05676715843489416,
      "grad_norm": 0.3104626766912227,
      "learning_rate": 0.00011353431686978833,
      "loss": 0.6068,
      "step": 885
    },
    {
      "epoch": 0.05708787684413085,
      "grad_norm": 0.6364607751424182,
      "learning_rate": 0.00011417575368826173,
      "loss": 0.6601,
      "step": 890
    },
    {
      "epoch": 0.05740859525336754,
      "grad_norm": 0.5489548053878326,
      "learning_rate": 0.0001148171905067351,
      "loss": 0.6498,
      "step": 895
    },
    {
      "epoch": 0.05772931366260423,
      "grad_norm": 0.8290809901584166,
      "learning_rate": 0.00011545862732520847,
      "loss": 0.7598,
      "step": 900
    },
    {
      "epoch": 0.058050032071840924,
      "grad_norm": 0.9889805070312973,
      "learning_rate": 0.00011610006414368186,
      "loss": 0.6528,
      "step": 905
    },
    {
      "epoch": 0.058370750481077614,
      "grad_norm": 0.5034027315098741,
      "learning_rate": 0.00011674150096215524,
      "loss": 0.6916,
      "step": 910
    },
    {
      "epoch": 0.058691468890314305,
      "grad_norm": 0.5211514737547632,
      "learning_rate": 0.0001173829377806286,
      "loss": 0.6455,
      "step": 915
    },
    {
      "epoch": 0.059012187299550996,
      "grad_norm": 0.5915915443611912,
      "learning_rate": 0.00011802437459910199,
      "loss": 0.619,
      "step": 920
    },
    {
      "epoch": 0.05933290570878769,
      "grad_norm": 0.6356669965403786,
      "learning_rate": 0.00011866581141757537,
      "loss": 0.6339,
      "step": 925
    },
    {
      "epoch": 0.05965362411802438,
      "grad_norm": 0.5203747383599147,
      "learning_rate": 0.00011930724823604876,
      "loss": 0.7174,
      "step": 930
    },
    {
      "epoch": 0.05997434252726107,
      "grad_norm": 0.4400681567105204,
      "learning_rate": 0.00011994868505452213,
      "loss": 0.757,
      "step": 935
    },
    {
      "epoch": 0.06029506093649775,
      "grad_norm": 0.5134463977576896,
      "learning_rate": 0.00012059012187299552,
      "loss": 0.5941,
      "step": 940
    },
    {
      "epoch": 0.06061577934573444,
      "grad_norm": 2.1514572255404563,
      "learning_rate": 0.0001212315586914689,
      "loss": 0.5872,
      "step": 945
    },
    {
      "epoch": 0.06093649775497113,
      "grad_norm": 0.5533804918183362,
      "learning_rate": 0.00012187299550994226,
      "loss": 0.681,
      "step": 950
    },
    {
      "epoch": 0.061257216164207824,
      "grad_norm": 0.43736512301454394,
      "learning_rate": 0.00012251443232841566,
      "loss": 0.6241,
      "step": 955
    },
    {
      "epoch": 0.061577934573444515,
      "grad_norm": 0.7036625039029036,
      "learning_rate": 0.00012315586914688904,
      "loss": 0.7528,
      "step": 960
    },
    {
      "epoch": 0.061898652982681206,
      "grad_norm": 0.5883952786255479,
      "learning_rate": 0.0001237973059653624,
      "loss": 0.6132,
      "step": 965
    },
    {
      "epoch": 0.062219371391917896,
      "grad_norm": 0.593687347482467,
      "learning_rate": 0.0001244387427838358,
      "loss": 0.6453,
      "step": 970
    },
    {
      "epoch": 0.06254008980115458,
      "grad_norm": 0.8797836564455341,
      "learning_rate": 0.00012508017960230917,
      "loss": 0.6658,
      "step": 975
    },
    {
      "epoch": 0.06286080821039128,
      "grad_norm": 0.8231331839998992,
      "learning_rate": 0.00012572161642078255,
      "loss": 0.6615,
      "step": 980
    },
    {
      "epoch": 0.06318152661962796,
      "grad_norm": 0.5202568995405973,
      "learning_rate": 0.00012636305323925594,
      "loss": 0.8156,
      "step": 985
    },
    {
      "epoch": 0.06350224502886466,
      "grad_norm": 0.623580493806845,
      "learning_rate": 0.00012700449005772932,
      "loss": 0.6959,
      "step": 990
    },
    {
      "epoch": 0.06382296343810134,
      "grad_norm": 0.5798575607273242,
      "learning_rate": 0.0001276459268762027,
      "loss": 0.5538,
      "step": 995
    },
    {
      "epoch": 0.06414368184733804,
      "grad_norm": 0.6970653558425355,
      "learning_rate": 0.0001282873636946761,
      "loss": 0.7063,
      "step": 1000
    },
    {
      "epoch": 0.06446440025657472,
      "grad_norm": 0.8241115273976609,
      "learning_rate": 0.00012892880051314947,
      "loss": 0.6371,
      "step": 1005
    },
    {
      "epoch": 0.06478511866581142,
      "grad_norm": 0.7769868872755683,
      "learning_rate": 0.00012957023733162283,
      "loss": 0.6202,
      "step": 1010
    },
    {
      "epoch": 0.0651058370750481,
      "grad_norm": 0.4974832858382039,
      "learning_rate": 0.00013021167415009624,
      "loss": 0.652,
      "step": 1015
    },
    {
      "epoch": 0.0654265554842848,
      "grad_norm": 0.7988613498086312,
      "learning_rate": 0.0001308531109685696,
      "loss": 0.6179,
      "step": 1020
    },
    {
      "epoch": 0.06574727389352149,
      "grad_norm": 0.5975032929676001,
      "learning_rate": 0.00013149454778704298,
      "loss": 0.7551,
      "step": 1025
    },
    {
      "epoch": 0.06606799230275817,
      "grad_norm": 0.46478481189365806,
      "learning_rate": 0.00013213598460551636,
      "loss": 0.6643,
      "step": 1030
    },
    {
      "epoch": 0.06638871071199487,
      "grad_norm": 0.5467473022741837,
      "learning_rate": 0.00013277742142398975,
      "loss": 0.6786,
      "step": 1035
    },
    {
      "epoch": 0.06670942912123155,
      "grad_norm": 0.788511157965346,
      "learning_rate": 0.00013341885824246313,
      "loss": 0.699,
      "step": 1040
    },
    {
      "epoch": 0.06703014753046825,
      "grad_norm": 0.7378591658959022,
      "learning_rate": 0.0001340602950609365,
      "loss": 0.5498,
      "step": 1045
    },
    {
      "epoch": 0.06735086593970493,
      "grad_norm": 0.524580967213953,
      "learning_rate": 0.0001347017318794099,
      "loss": 0.7092,
      "step": 1050
    },
    {
      "epoch": 0.06767158434894163,
      "grad_norm": 10.11033461685559,
      "learning_rate": 0.00013534316869788325,
      "loss": 0.6694,
      "step": 1055
    },
    {
      "epoch": 0.06799230275817832,
      "grad_norm": 0.6039061177211199,
      "learning_rate": 0.00013598460551635666,
      "loss": 0.6105,
      "step": 1060
    },
    {
      "epoch": 0.06831302116741501,
      "grad_norm": 0.7863303522868051,
      "learning_rate": 0.00013662604233483002,
      "loss": 0.6867,
      "step": 1065
    },
    {
      "epoch": 0.0686337395766517,
      "grad_norm": 0.6197712573428509,
      "learning_rate": 0.0001372674791533034,
      "loss": 0.6893,
      "step": 1070
    },
    {
      "epoch": 0.0689544579858884,
      "grad_norm": 0.43888192750291055,
      "learning_rate": 0.0001379089159717768,
      "loss": 0.7157,
      "step": 1075
    },
    {
      "epoch": 0.06927517639512508,
      "grad_norm": 0.7306535592576365,
      "learning_rate": 0.00013855035279025017,
      "loss": 0.7648,
      "step": 1080
    },
    {
      "epoch": 0.06959589480436178,
      "grad_norm": 0.5833095869044383,
      "learning_rate": 0.00013919178960872356,
      "loss": 0.6655,
      "step": 1085
    },
    {
      "epoch": 0.06991661321359846,
      "grad_norm": 0.3330431009666685,
      "learning_rate": 0.0001398332264271969,
      "loss": 0.5681,
      "step": 1090
    },
    {
      "epoch": 0.07023733162283514,
      "grad_norm": 0.8485768964159431,
      "learning_rate": 0.00014047466324567032,
      "loss": 0.6139,
      "step": 1095
    },
    {
      "epoch": 0.07055805003207184,
      "grad_norm": 0.48935398848123357,
      "learning_rate": 0.00014111610006414368,
      "loss": 0.6591,
      "step": 1100
    },
    {
      "epoch": 0.07087876844130853,
      "grad_norm": 0.6694840056428312,
      "learning_rate": 0.00014175753688261706,
      "loss": 0.5986,
      "step": 1105
    },
    {
      "epoch": 0.07119948685054522,
      "grad_norm": 0.7907065672480846,
      "learning_rate": 0.00014239897370109045,
      "loss": 0.8477,
      "step": 1110
    },
    {
      "epoch": 0.07152020525978191,
      "grad_norm": 0.45721463553494507,
      "learning_rate": 0.00014304041051956383,
      "loss": 0.6511,
      "step": 1115
    },
    {
      "epoch": 0.0718409236690186,
      "grad_norm": 0.5932773719713492,
      "learning_rate": 0.00014368184733803721,
      "loss": 0.6205,
      "step": 1120
    },
    {
      "epoch": 0.07216164207825529,
      "grad_norm": 0.7933284443225256,
      "learning_rate": 0.0001443232841565106,
      "loss": 0.524,
      "step": 1125
    },
    {
      "epoch": 0.07248236048749199,
      "grad_norm": 0.4677884329123659,
      "learning_rate": 0.00014496472097498398,
      "loss": 0.555,
      "step": 1130
    },
    {
      "epoch": 0.07280307889672867,
      "grad_norm": 0.850254756515873,
      "learning_rate": 0.00014560615779345734,
      "loss": 0.7627,
      "step": 1135
    },
    {
      "epoch": 0.07312379730596537,
      "grad_norm": 0.522103651356661,
      "learning_rate": 0.00014624759461193075,
      "loss": 0.7255,
      "step": 1140
    },
    {
      "epoch": 0.07344451571520205,
      "grad_norm": 0.6063292373713933,
      "learning_rate": 0.0001468890314304041,
      "loss": 0.6222,
      "step": 1145
    },
    {
      "epoch": 0.07376523412443874,
      "grad_norm": 0.9713303841273095,
      "learning_rate": 0.0001475304682488775,
      "loss": 0.7341,
      "step": 1150
    },
    {
      "epoch": 0.07408595253367543,
      "grad_norm": 0.837884018201796,
      "learning_rate": 0.00014817190506735087,
      "loss": 0.6822,
      "step": 1155
    },
    {
      "epoch": 0.07440667094291212,
      "grad_norm": 0.39437246960153793,
      "learning_rate": 0.00014881334188582426,
      "loss": 0.7116,
      "step": 1160
    },
    {
      "epoch": 0.07472738935214882,
      "grad_norm": 0.6202094758512229,
      "learning_rate": 0.0001494547787042976,
      "loss": 0.6015,
      "step": 1165
    },
    {
      "epoch": 0.0750481077613855,
      "grad_norm": 0.8135054592447762,
      "learning_rate": 0.00015009621552277102,
      "loss": 0.6487,
      "step": 1170
    },
    {
      "epoch": 0.0753688261706222,
      "grad_norm": 0.5507524560111344,
      "learning_rate": 0.0001507376523412444,
      "loss": 0.5846,
      "step": 1175
    },
    {
      "epoch": 0.07568954457985888,
      "grad_norm": 0.5961939171868111,
      "learning_rate": 0.00015137908915971776,
      "loss": 0.6111,
      "step": 1180
    },
    {
      "epoch": 0.07601026298909558,
      "grad_norm": 0.5352884760699661,
      "learning_rate": 0.00015202052597819118,
      "loss": 0.6401,
      "step": 1185
    },
    {
      "epoch": 0.07633098139833226,
      "grad_norm": 0.6620834657515849,
      "learning_rate": 0.00015266196279666453,
      "loss": 0.7108,
      "step": 1190
    },
    {
      "epoch": 0.07665169980756896,
      "grad_norm": 0.24886726646481336,
      "learning_rate": 0.00015330339961513792,
      "loss": 0.465,
      "step": 1195
    },
    {
      "epoch": 0.07697241821680564,
      "grad_norm": 0.5949618384904851,
      "learning_rate": 0.0001539448364336113,
      "loss": 0.6872,
      "step": 1200
    },
    {
      "epoch": 0.07729313662604234,
      "grad_norm": 0.7888477619326826,
      "learning_rate": 0.00015458627325208468,
      "loss": 0.5609,
      "step": 1205
    },
    {
      "epoch": 0.07761385503527903,
      "grad_norm": 1.0669700966748508,
      "learning_rate": 0.00015522771007055804,
      "loss": 0.7743,
      "step": 1210
    },
    {
      "epoch": 0.07793457344451571,
      "grad_norm": 0.7068283314311553,
      "learning_rate": 0.00015586914688903145,
      "loss": 0.6263,
      "step": 1215
    },
    {
      "epoch": 0.0782552918537524,
      "grad_norm": 0.5841407337661559,
      "learning_rate": 0.00015651058370750483,
      "loss": 0.6187,
      "step": 1220
    },
    {
      "epoch": 0.07857601026298909,
      "grad_norm": 0.6229227132294815,
      "learning_rate": 0.0001571520205259782,
      "loss": 0.7183,
      "step": 1225
    },
    {
      "epoch": 0.07889672867222579,
      "grad_norm": 0.6002586833079545,
      "learning_rate": 0.0001577934573444516,
      "loss": 0.7077,
      "step": 1230
    },
    {
      "epoch": 0.07921744708146247,
      "grad_norm": 0.5383734940611982,
      "learning_rate": 0.00015843489416292496,
      "loss": 0.6251,
      "step": 1235
    },
    {
      "epoch": 0.07953816549069917,
      "grad_norm": 0.6051581628244698,
      "learning_rate": 0.00015907633098139834,
      "loss": 0.6742,
      "step": 1240
    },
    {
      "epoch": 0.07985888389993585,
      "grad_norm": 0.6524111511727346,
      "learning_rate": 0.0001597177677998717,
      "loss": 0.6258,
      "step": 1245
    },
    {
      "epoch": 0.08017960230917255,
      "grad_norm": 0.8452071724294624,
      "learning_rate": 0.0001603592046183451,
      "loss": 0.6583,
      "step": 1250
    },
    {
      "epoch": 0.08050032071840924,
      "grad_norm": 0.5380526459581828,
      "learning_rate": 0.00016100064143681847,
      "loss": 0.7976,
      "step": 1255
    },
    {
      "epoch": 0.08082103912764593,
      "grad_norm": 0.846065125270878,
      "learning_rate": 0.00016164207825529185,
      "loss": 0.5684,
      "step": 1260
    },
    {
      "epoch": 0.08114175753688262,
      "grad_norm": 1.2668855662638892,
      "learning_rate": 0.00016228351507376523,
      "loss": 0.6079,
      "step": 1265
    },
    {
      "epoch": 0.0814624759461193,
      "grad_norm": 0.7795964267281216,
      "learning_rate": 0.00016292495189223862,
      "loss": 0.646,
      "step": 1270
    },
    {
      "epoch": 0.081783194355356,
      "grad_norm": 0.7027735707273621,
      "learning_rate": 0.000163566388710712,
      "loss": 0.7358,
      "step": 1275
    },
    {
      "epoch": 0.08210391276459268,
      "grad_norm": 0.6792816013615487,
      "learning_rate": 0.00016420782552918538,
      "loss": 0.6695,
      "step": 1280
    },
    {
      "epoch": 0.08242463117382938,
      "grad_norm": 0.6182179483058359,
      "learning_rate": 0.00016484926234765877,
      "loss": 0.6096,
      "step": 1285
    },
    {
      "epoch": 0.08274534958306606,
      "grad_norm": 0.7701573171054498,
      "learning_rate": 0.00016549069916613212,
      "loss": 0.6467,
      "step": 1290
    },
    {
      "epoch": 0.08306606799230276,
      "grad_norm": 0.8699247842006342,
      "learning_rate": 0.00016613213598460554,
      "loss": 0.5635,
      "step": 1295
    },
    {
      "epoch": 0.08338678640153944,
      "grad_norm": 1.5815526952211336,
      "learning_rate": 0.0001667735728030789,
      "loss": 0.7091,
      "step": 1300
    },
    {
      "epoch": 0.08370750481077614,
      "grad_norm": 1.1184328365345817,
      "learning_rate": 0.00016741500962155228,
      "loss": 0.6598,
      "step": 1305
    },
    {
      "epoch": 0.08402822322001283,
      "grad_norm": 0.5795213958251844,
      "learning_rate": 0.00016805644644002566,
      "loss": 0.6638,
      "step": 1310
    },
    {
      "epoch": 0.08434894162924952,
      "grad_norm": 0.9373149156332843,
      "learning_rate": 0.00016869788325849904,
      "loss": 0.6091,
      "step": 1315
    },
    {
      "epoch": 0.08466966003848621,
      "grad_norm": 1.581754110063961,
      "learning_rate": 0.00016933932007697243,
      "loss": 0.6641,
      "step": 1320
    },
    {
      "epoch": 0.0849903784477229,
      "grad_norm": 0.8932544552326179,
      "learning_rate": 0.0001699807568954458,
      "loss": 0.7052,
      "step": 1325
    },
    {
      "epoch": 0.08531109685695959,
      "grad_norm": 0.7663989496912428,
      "learning_rate": 0.0001706221937139192,
      "loss": 0.6517,
      "step": 1330
    },
    {
      "epoch": 0.08563181526619627,
      "grad_norm": 0.46405474836741084,
      "learning_rate": 0.00017126363053239255,
      "loss": 0.6357,
      "step": 1335
    },
    {
      "epoch": 0.08595253367543297,
      "grad_norm": 0.6382018266002509,
      "learning_rate": 0.00017190506735086596,
      "loss": 0.5614,
      "step": 1340
    },
    {
      "epoch": 0.08627325208466965,
      "grad_norm": 0.43085923514031815,
      "learning_rate": 0.00017254650416933932,
      "loss": 0.6499,
      "step": 1345
    },
    {
      "epoch": 0.08659397049390635,
      "grad_norm": 0.8362450675258178,
      "learning_rate": 0.0001731879409878127,
      "loss": 0.7166,
      "step": 1350
    },
    {
      "epoch": 0.08691468890314304,
      "grad_norm": 0.6383324045212788,
      "learning_rate": 0.00017382937780628609,
      "loss": 0.6072,
      "step": 1355
    },
    {
      "epoch": 0.08723540731237973,
      "grad_norm": 0.8935601954358443,
      "learning_rate": 0.00017447081462475947,
      "loss": 0.6483,
      "step": 1360
    },
    {
      "epoch": 0.08755612572161642,
      "grad_norm": 0.7202566228037989,
      "learning_rate": 0.00017511225144323285,
      "loss": 0.5967,
      "step": 1365
    },
    {
      "epoch": 0.08787684413085312,
      "grad_norm": 0.5230697995372986,
      "learning_rate": 0.00017575368826170624,
      "loss": 0.7513,
      "step": 1370
    },
    {
      "epoch": 0.0881975625400898,
      "grad_norm": 0.816705171178794,
      "learning_rate": 0.00017639512508017962,
      "loss": 0.6651,
      "step": 1375
    },
    {
      "epoch": 0.0885182809493265,
      "grad_norm": 0.5342326962298032,
      "learning_rate": 0.00017703656189865298,
      "loss": 0.5963,
      "step": 1380
    },
    {
      "epoch": 0.08883899935856318,
      "grad_norm": 0.5984545509333057,
      "learning_rate": 0.0001776779987171264,
      "loss": 0.6455,
      "step": 1385
    },
    {
      "epoch": 0.08915971776779986,
      "grad_norm": 0.6477898184624558,
      "learning_rate": 0.00017831943553559974,
      "loss": 0.7328,
      "step": 1390
    },
    {
      "epoch": 0.08948043617703656,
      "grad_norm": 0.5092110599627088,
      "learning_rate": 0.00017896087235407313,
      "loss": 0.6279,
      "step": 1395
    },
    {
      "epoch": 0.08980115458627325,
      "grad_norm": 0.7029802255673286,
      "learning_rate": 0.0001796023091725465,
      "loss": 0.7776,
      "step": 1400
    },
    {
      "epoch": 0.09012187299550994,
      "grad_norm": 0.8832343335799728,
      "learning_rate": 0.0001802437459910199,
      "loss": 0.6111,
      "step": 1405
    },
    {
      "epoch": 0.09044259140474663,
      "grad_norm": 0.8016746694750925,
      "learning_rate": 0.00018088518280949328,
      "loss": 0.6695,
      "step": 1410
    },
    {
      "epoch": 0.09076330981398333,
      "grad_norm": 1.1634306884211862,
      "learning_rate": 0.00018152661962796664,
      "loss": 0.7114,
      "step": 1415
    },
    {
      "epoch": 0.09108402822322001,
      "grad_norm": 0.6624473023568856,
      "learning_rate": 0.00018216805644644005,
      "loss": 0.7559,
      "step": 1420
    },
    {
      "epoch": 0.09140474663245671,
      "grad_norm": 0.8042759336949421,
      "learning_rate": 0.0001828094932649134,
      "loss": 0.7583,
      "step": 1425
    },
    {
      "epoch": 0.09172546504169339,
      "grad_norm": 0.9772784468918035,
      "learning_rate": 0.00018345093008338679,
      "loss": 0.6853,
      "step": 1430
    },
    {
      "epoch": 0.09204618345093009,
      "grad_norm": 0.5803428867246113,
      "learning_rate": 0.00018409236690186017,
      "loss": 0.6201,
      "step": 1435
    },
    {
      "epoch": 0.09236690186016677,
      "grad_norm": 1.0135605254585267,
      "learning_rate": 0.00018473380372033355,
      "loss": 0.5897,
      "step": 1440
    },
    {
      "epoch": 0.09268762026940347,
      "grad_norm": 1.0146665351265378,
      "learning_rate": 0.00018537524053880694,
      "loss": 0.7739,
      "step": 1445
    },
    {
      "epoch": 0.09300833867864015,
      "grad_norm": 0.6409550994368336,
      "learning_rate": 0.00018601667735728032,
      "loss": 0.652,
      "step": 1450
    },
    {
      "epoch": 0.09332905708787684,
      "grad_norm": 0.9063409381829404,
      "learning_rate": 0.0001866581141757537,
      "loss": 0.5091,
      "step": 1455
    },
    {
      "epoch": 0.09364977549711354,
      "grad_norm": 0.6035311851346433,
      "learning_rate": 0.00018729955099422706,
      "loss": 0.5951,
      "step": 1460
    },
    {
      "epoch": 0.09397049390635022,
      "grad_norm": 0.4305914555852047,
      "learning_rate": 0.00018794098781270047,
      "loss": 0.6979,
      "step": 1465
    },
    {
      "epoch": 0.09429121231558692,
      "grad_norm": 0.592322337116948,
      "learning_rate": 0.00018858242463117383,
      "loss": 0.7894,
      "step": 1470
    },
    {
      "epoch": 0.0946119307248236,
      "grad_norm": 0.7019994823024447,
      "learning_rate": 0.0001892238614496472,
      "loss": 0.6685,
      "step": 1475
    },
    {
      "epoch": 0.0949326491340603,
      "grad_norm": 0.6511984672543305,
      "learning_rate": 0.0001898652982681206,
      "loss": 0.7993,
      "step": 1480
    },
    {
      "epoch": 0.09525336754329698,
      "grad_norm": 0.7220123377652353,
      "learning_rate": 0.00019050673508659398,
      "loss": 0.6424,
      "step": 1485
    },
    {
      "epoch": 0.09557408595253368,
      "grad_norm": 0.569165004645741,
      "learning_rate": 0.00019114817190506736,
      "loss": 0.5879,
      "step": 1490
    },
    {
      "epoch": 0.09589480436177036,
      "grad_norm": 0.6841283140830406,
      "learning_rate": 0.00019178960872354075,
      "loss": 0.6944,
      "step": 1495
    },
    {
      "epoch": 0.09621552277100706,
      "grad_norm": 0.5806780565962407,
      "learning_rate": 0.00019243104554201413,
      "loss": 0.7039,
      "step": 1500
    },
    {
      "epoch": 0.09653624118024375,
      "grad_norm": 1.0231588558162683,
      "learning_rate": 0.0001930724823604875,
      "loss": 0.7447,
      "step": 1505
    },
    {
      "epoch": 0.09685695958948044,
      "grad_norm": 0.6513202839027658,
      "learning_rate": 0.0001937139191789609,
      "loss": 0.6013,
      "step": 1510
    },
    {
      "epoch": 0.09717767799871713,
      "grad_norm": 0.7845659853361092,
      "learning_rate": 0.00019435535599743425,
      "loss": 0.6069,
      "step": 1515
    },
    {
      "epoch": 0.09749839640795381,
      "grad_norm": 0.7194048768316849,
      "learning_rate": 0.00019499679281590764,
      "loss": 0.7641,
      "step": 1520
    },
    {
      "epoch": 0.09781911481719051,
      "grad_norm": 0.6191788469641755,
      "learning_rate": 0.00019563822963438102,
      "loss": 0.7448,
      "step": 1525
    },
    {
      "epoch": 0.09813983322642719,
      "grad_norm": 0.7426546495672112,
      "learning_rate": 0.0001962796664528544,
      "loss": 0.7761,
      "step": 1530
    },
    {
      "epoch": 0.09846055163566389,
      "grad_norm": 0.7572762314827131,
      "learning_rate": 0.0001969211032713278,
      "loss": 0.8618,
      "step": 1535
    },
    {
      "epoch": 0.09878127004490057,
      "grad_norm": 0.6372317781767599,
      "learning_rate": 0.00019756254008980117,
      "loss": 0.6666,
      "step": 1540
    },
    {
      "epoch": 0.09910198845413727,
      "grad_norm": 0.6326871836739665,
      "learning_rate": 0.00019820397690827456,
      "loss": 0.6547,
      "step": 1545
    },
    {
      "epoch": 0.09942270686337396,
      "grad_norm": 1.1563371750862326,
      "learning_rate": 0.0001988454137267479,
      "loss": 0.6453,
      "step": 1550
    },
    {
      "epoch": 0.09974342527261065,
      "grad_norm": 0.9479492008239019,
      "learning_rate": 0.00019948685054522132,
      "loss": 0.651,
      "step": 1555
    },
    {
      "epoch": 0.10006414368184734,
      "grad_norm": 0.7535422723224012,
      "learning_rate": 0.00019999999749335695,
      "loss": 0.7093,
      "step": 1560
    },
    {
      "epoch": 0.10038486209108403,
      "grad_norm": 0.5932112107729582,
      "learning_rate": 0.00019999990976086248,
      "loss": 0.7182,
      "step": 1565
    },
    {
      "epoch": 0.10070558050032072,
      "grad_norm": 0.6993778331415806,
      "learning_rate": 0.00019999969669633985,
      "loss": 0.6146,
      "step": 1570
    },
    {
      "epoch": 0.1010262989095574,
      "grad_norm": 0.7283971397341802,
      "learning_rate": 0.00019999935830005615,
      "loss": 0.6496,
      "step": 1575
    },
    {
      "epoch": 0.1013470173187941,
      "grad_norm": 1.0242803435192598,
      "learning_rate": 0.00019999889457243545,
      "loss": 0.8042,
      "step": 1580
    },
    {
      "epoch": 0.10166773572803078,
      "grad_norm": 0.6322290861624766,
      "learning_rate": 0.000199998305514059,
      "loss": 0.7667,
      "step": 1585
    },
    {
      "epoch": 0.10198845413726748,
      "grad_norm": 0.6155965395909687,
      "learning_rate": 0.00019999759112566498,
      "loss": 0.6363,
      "step": 1590
    },
    {
      "epoch": 0.10230917254650417,
      "grad_norm": 0.4557230080410517,
      "learning_rate": 0.00019999675140814887,
      "loss": 0.5606,
      "step": 1595
    },
    {
      "epoch": 0.10262989095574086,
      "grad_norm": 0.6477761450960091,
      "learning_rate": 0.00019999578636256302,
      "loss": 0.6693,
      "step": 1600
    },
    {
      "epoch": 0.10295060936497755,
      "grad_norm": 0.8654904236010101,
      "learning_rate": 0.000199994695990117,
      "loss": 0.6314,
      "step": 1605
    },
    {
      "epoch": 0.10327132777421424,
      "grad_norm": 0.6903326653951578,
      "learning_rate": 0.00019999348029217732,
      "loss": 0.6179,
      "step": 1610
    },
    {
      "epoch": 0.10359204618345093,
      "grad_norm": 0.8840202106741641,
      "learning_rate": 0.00019999213927026775,
      "loss": 0.6985,
      "step": 1615
    },
    {
      "epoch": 0.10391276459268763,
      "grad_norm": 0.668673191642802,
      "learning_rate": 0.00019999067292606894,
      "loss": 0.6218,
      "step": 1620
    },
    {
      "epoch": 0.10423348300192431,
      "grad_norm": 0.8731375253636731,
      "learning_rate": 0.00019998908126141868,
      "loss": 0.6898,
      "step": 1625
    },
    {
      "epoch": 0.10455420141116101,
      "grad_norm": 1.0983344411460778,
      "learning_rate": 0.00019998736427831194,
      "loss": 0.7532,
      "step": 1630
    },
    {
      "epoch": 0.10487491982039769,
      "grad_norm": 0.5721731282366914,
      "learning_rate": 0.00019998552197890052,
      "loss": 0.6003,
      "step": 1635
    },
    {
      "epoch": 0.10519563822963438,
      "grad_norm": 0.679795611170959,
      "learning_rate": 0.0001999835543654935,
      "loss": 0.7003,
      "step": 1640
    },
    {
      "epoch": 0.10551635663887107,
      "grad_norm": 0.7659746917304108,
      "learning_rate": 0.0001999814614405569,
      "loss": 0.6359,
      "step": 1645
    },
    {
      "epoch": 0.10583707504810776,
      "grad_norm": 1.1962756283471876,
      "learning_rate": 0.00019997924320671383,
      "loss": 0.6308,
      "step": 1650
    },
    {
      "epoch": 0.10615779345734445,
      "grad_norm": 0.7621683185763631,
      "learning_rate": 0.00019997689966674446,
      "loss": 0.7957,
      "step": 1655
    },
    {
      "epoch": 0.10647851186658114,
      "grad_norm": 0.7338531701197929,
      "learning_rate": 0.00019997443082358601,
      "loss": 0.6757,
      "step": 1660
    },
    {
      "epoch": 0.10679923027581784,
      "grad_norm": 0.7150664806057576,
      "learning_rate": 0.00019997183668033267,
      "loss": 0.694,
      "step": 1665
    },
    {
      "epoch": 0.10711994868505452,
      "grad_norm": 0.7869356473972234,
      "learning_rate": 0.0001999691172402358,
      "loss": 0.719,
      "step": 1670
    },
    {
      "epoch": 0.10744066709429122,
      "grad_norm": 0.611503667039071,
      "learning_rate": 0.00019996627250670374,
      "loss": 0.6343,
      "step": 1675
    },
    {
      "epoch": 0.1077613855035279,
      "grad_norm": 0.7766135920581687,
      "learning_rate": 0.00019996330248330183,
      "loss": 0.693,
      "step": 1680
    },
    {
      "epoch": 0.1080821039127646,
      "grad_norm": 0.4786388847248821,
      "learning_rate": 0.00019996020717375247,
      "loss": 0.6194,
      "step": 1685
    },
    {
      "epoch": 0.10840282232200128,
      "grad_norm": 0.6991936018277035,
      "learning_rate": 0.000199956986581935,
      "loss": 0.7263,
      "step": 1690
    },
    {
      "epoch": 0.10872354073123797,
      "grad_norm": 0.7205841321201338,
      "learning_rate": 0.000199953640711886,
      "loss": 0.4831,
      "step": 1695
    },
    {
      "epoch": 0.10904425914047466,
      "grad_norm": 0.9131191032401795,
      "learning_rate": 0.00019995016956779886,
      "loss": 0.5177,
      "step": 1700
    },
    {
      "epoch": 0.10936497754971135,
      "grad_norm": 0.5536147800325968,
      "learning_rate": 0.000199946573154024,
      "loss": 0.6789,
      "step": 1705
    },
    {
      "epoch": 0.10968569595894805,
      "grad_norm": 0.6451976876558219,
      "learning_rate": 0.00019994285147506888,
      "loss": 0.7275,
      "step": 1710
    },
    {
      "epoch": 0.11000641436818473,
      "grad_norm": 0.9579506214333907,
      "learning_rate": 0.00019993900453559805,
      "loss": 0.6589,
      "step": 1715
    },
    {
      "epoch": 0.11032713277742143,
      "grad_norm": 0.9260040237199151,
      "learning_rate": 0.00019993503234043284,
      "loss": 0.6823,
      "step": 1720
    },
    {
      "epoch": 0.11064785118665811,
      "grad_norm": 0.9505358223036796,
      "learning_rate": 0.00019993093489455182,
      "loss": 0.7616,
      "step": 1725
    },
    {
      "epoch": 0.11096856959589481,
      "grad_norm": 0.7825553328319829,
      "learning_rate": 0.0001999267122030903,
      "loss": 0.6443,
      "step": 1730
    },
    {
      "epoch": 0.11128928800513149,
      "grad_norm": 1.277608679789176,
      "learning_rate": 0.00019992236427134069,
      "loss": 0.6155,
      "step": 1735
    },
    {
      "epoch": 0.11161000641436819,
      "grad_norm": 0.5889261013180431,
      "learning_rate": 0.00019991789110475238,
      "loss": 0.6994,
      "step": 1740
    },
    {
      "epoch": 0.11193072482360487,
      "grad_norm": 0.8029959511201281,
      "learning_rate": 0.00019991329270893163,
      "loss": 0.5902,
      "step": 1745
    },
    {
      "epoch": 0.11225144323284157,
      "grad_norm": 0.8303612970994603,
      "learning_rate": 0.00019990856908964178,
      "loss": 0.783,
      "step": 1750
    },
    {
      "epoch": 0.11257216164207826,
      "grad_norm": 0.7054559375502497,
      "learning_rate": 0.00019990372025280304,
      "loss": 0.6792,
      "step": 1755
    },
    {
      "epoch": 0.11289288005131494,
      "grad_norm": 0.7420987703476908,
      "learning_rate": 0.0001998987462044925,
      "loss": 0.6013,
      "step": 1760
    },
    {
      "epoch": 0.11321359846055164,
      "grad_norm": 0.7094425366646243,
      "learning_rate": 0.00019989364695094426,
      "loss": 0.5688,
      "step": 1765
    },
    {
      "epoch": 0.11353431686978832,
      "grad_norm": 0.569373653159604,
      "learning_rate": 0.00019988842249854934,
      "loss": 0.58,
      "step": 1770
    },
    {
      "epoch": 0.11385503527902502,
      "grad_norm": 0.46978550262066865,
      "learning_rate": 0.00019988307285385566,
      "loss": 0.7256,
      "step": 1775
    },
    {
      "epoch": 0.1141757536882617,
      "grad_norm": 0.6612438373633108,
      "learning_rate": 0.00019987759802356803,
      "loss": 0.7488,
      "step": 1780
    },
    {
      "epoch": 0.1144964720974984,
      "grad_norm": 0.7309333682103005,
      "learning_rate": 0.00019987199801454816,
      "loss": 0.7284,
      "step": 1785
    },
    {
      "epoch": 0.11481719050673508,
      "grad_norm": 0.9460563497076551,
      "learning_rate": 0.00019986627283381472,
      "loss": 0.6057,
      "step": 1790
    },
    {
      "epoch": 0.11513790891597178,
      "grad_norm": 0.6266870049609108,
      "learning_rate": 0.00019986042248854312,
      "loss": 0.6476,
      "step": 1795
    },
    {
      "epoch": 0.11545862732520847,
      "grad_norm": 0.7739200309128734,
      "learning_rate": 0.0001998544469860658,
      "loss": 0.6622,
      "step": 1800
    },
    {
      "epoch": 0.11577934573444516,
      "grad_norm": 0.7469556806210228,
      "learning_rate": 0.00019984834633387193,
      "loss": 0.5747,
      "step": 1805
    },
    {
      "epoch": 0.11610006414368185,
      "grad_norm": 0.5367955199234249,
      "learning_rate": 0.00019984212053960763,
      "loss": 0.671,
      "step": 1810
    },
    {
      "epoch": 0.11642078255291853,
      "grad_norm": 1.0704497861003814,
      "learning_rate": 0.00019983576961107576,
      "loss": 0.6748,
      "step": 1815
    },
    {
      "epoch": 0.11674150096215523,
      "grad_norm": 0.6669764759339204,
      "learning_rate": 0.00019982929355623615,
      "loss": 0.7167,
      "step": 1820
    },
    {
      "epoch": 0.11706221937139191,
      "grad_norm": 0.6039796198063991,
      "learning_rate": 0.00019982269238320532,
      "loss": 0.6067,
      "step": 1825
    },
    {
      "epoch": 0.11738293778062861,
      "grad_norm": 1.7098001118613075,
      "learning_rate": 0.00019981596610025668,
      "loss": 0.7805,
      "step": 1830
    },
    {
      "epoch": 0.1177036561898653,
      "grad_norm": 0.7398855694010563,
      "learning_rate": 0.00019980911471582043,
      "loss": 0.6427,
      "step": 1835
    },
    {
      "epoch": 0.11802437459910199,
      "grad_norm": 0.8354800121875872,
      "learning_rate": 0.0001998021382384836,
      "loss": 0.7408,
      "step": 1840
    },
    {
      "epoch": 0.11834509300833868,
      "grad_norm": 0.6722235019789473,
      "learning_rate": 0.00019979503667698985,
      "loss": 0.6435,
      "step": 1845
    },
    {
      "epoch": 0.11866581141757537,
      "grad_norm": 0.717593721397057,
      "learning_rate": 0.00019978781004023982,
      "loss": 0.6241,
      "step": 1850
    },
    {
      "epoch": 0.11898652982681206,
      "grad_norm": 0.7195515776738803,
      "learning_rate": 0.00019978045833729074,
      "loss": 0.5947,
      "step": 1855
    },
    {
      "epoch": 0.11930724823604875,
      "grad_norm": 0.8882886022840869,
      "learning_rate": 0.00019977298157735672,
      "loss": 0.7388,
      "step": 1860
    },
    {
      "epoch": 0.11962796664528544,
      "grad_norm": 0.989988319302347,
      "learning_rate": 0.0001997653797698085,
      "loss": 0.7599,
      "step": 1865
    },
    {
      "epoch": 0.11994868505452214,
      "grad_norm": 0.8403633651058144,
      "learning_rate": 0.00019975765292417358,
      "loss": 0.6432,
      "step": 1870
    },
    {
      "epoch": 0.12026940346375882,
      "grad_norm": 1.2049771636877937,
      "learning_rate": 0.00019974980105013623,
      "loss": 0.7333,
      "step": 1875
    },
    {
      "epoch": 0.1205901218729955,
      "grad_norm": 0.8525983520687547,
      "learning_rate": 0.00019974182415753732,
      "loss": 0.6699,
      "step": 1880
    },
    {
      "epoch": 0.1209108402822322,
      "grad_norm": 0.5716659731530915,
      "learning_rate": 0.00019973372225637453,
      "loss": 0.5793,
      "step": 1885
    },
    {
      "epoch": 0.12123155869146889,
      "grad_norm": 0.6060632420377923,
      "learning_rate": 0.00019972549535680206,
      "loss": 0.671,
      "step": 1890
    },
    {
      "epoch": 0.12155227710070558,
      "grad_norm": 0.7561918292328402,
      "learning_rate": 0.00019971714346913086,
      "loss": 0.5316,
      "step": 1895
    },
    {
      "epoch": 0.12187299550994227,
      "grad_norm": 0.9824211285333242,
      "learning_rate": 0.00019970866660382863,
      "loss": 0.5868,
      "step": 1900
    },
    {
      "epoch": 0.12219371391917896,
      "grad_norm": 0.7951038927386893,
      "learning_rate": 0.00019970006477151953,
      "loss": 0.7,
      "step": 1905
    },
    {
      "epoch": 0.12251443232841565,
      "grad_norm": 0.747912075117886,
      "learning_rate": 0.0001996913379829844,
      "loss": 0.5798,
      "step": 1910
    },
    {
      "epoch": 0.12283515073765235,
      "grad_norm": 1.2254454430699995,
      "learning_rate": 0.00019968248624916077,
      "loss": 0.6667,
      "step": 1915
    },
    {
      "epoch": 0.12315586914688903,
      "grad_norm": 1.1768102485963885,
      "learning_rate": 0.00019967350958114267,
      "loss": 0.5774,
      "step": 1920
    },
    {
      "epoch": 0.12347658755612573,
      "grad_norm": 0.6310183951664794,
      "learning_rate": 0.0001996644079901808,
      "loss": 0.4486,
      "step": 1925
    },
    {
      "epoch": 0.12379730596536241,
      "grad_norm": 0.8260925792950813,
      "learning_rate": 0.00019965518148768233,
      "loss": 0.5623,
      "step": 1930
    },
    {
      "epoch": 0.1241180243745991,
      "grad_norm": 0.9150306074218141,
      "learning_rate": 0.000199645830085211,
      "loss": 0.83,
      "step": 1935
    },
    {
      "epoch": 0.12443874278383579,
      "grad_norm": 0.9369210275043979,
      "learning_rate": 0.00019963635379448722,
      "loss": 0.7223,
      "step": 1940
    },
    {
      "epoch": 0.12475946119307248,
      "grad_norm": 0.748894355642791,
      "learning_rate": 0.00019962675262738774,
      "loss": 0.6919,
      "step": 1945
    },
    {
      "epoch": 0.12508017960230916,
      "grad_norm": 1.1961745083017192,
      "learning_rate": 0.00019961702659594598,
      "loss": 0.5536,
      "step": 1950
    },
    {
      "epoch": 0.12540089801154586,
      "grad_norm": 0.568991855421978,
      "learning_rate": 0.00019960717571235173,
      "loss": 0.639,
      "step": 1955
    },
    {
      "epoch": 0.12572161642078256,
      "grad_norm": 1.0900526061976745,
      "learning_rate": 0.00019959719998895135,
      "loss": 0.6571,
      "step": 1960
    },
    {
      "epoch": 0.12604233483001925,
      "grad_norm": 0.7953938211319622,
      "learning_rate": 0.00019958709943824758,
      "loss": 0.7077,
      "step": 1965
    },
    {
      "epoch": 0.12636305323925592,
      "grad_norm": 1.0090362549424627,
      "learning_rate": 0.0001995768740728997,
      "loss": 0.629,
      "step": 1970
    },
    {
      "epoch": 0.12668377164849262,
      "grad_norm": 0.7822194115921188,
      "learning_rate": 0.0001995665239057234,
      "loss": 0.7948,
      "step": 1975
    },
    {
      "epoch": 0.12700449005772932,
      "grad_norm": 0.82569207599097,
      "learning_rate": 0.00019955604894969067,
      "loss": 0.6823,
      "step": 1980
    },
    {
      "epoch": 0.12732520846696602,
      "grad_norm": 0.5455388809406508,
      "learning_rate": 0.0001995454492179301,
      "loss": 0.5594,
      "step": 1985
    },
    {
      "epoch": 0.1276459268762027,
      "grad_norm": 0.7695218529222057,
      "learning_rate": 0.00019953472472372647,
      "loss": 0.7198,
      "step": 1990
    },
    {
      "epoch": 0.12796664528543938,
      "grad_norm": 0.8673513110262479,
      "learning_rate": 0.00019952387548052112,
      "loss": 0.7148,
      "step": 1995
    },
    {
      "epoch": 0.12828736369467608,
      "grad_norm": 0.919881076337375,
      "learning_rate": 0.00019951290150191158,
      "loss": 0.6439,
      "step": 2000
    },
    {
      "epoch": 0.12860808210391275,
      "grad_norm": 0.9262998866124367,
      "learning_rate": 0.00019950180280165175,
      "loss": 0.5764,
      "step": 2005
    },
    {
      "epoch": 0.12892880051314945,
      "grad_norm": 0.6765034342263078,
      "learning_rate": 0.00019949057939365193,
      "loss": 0.4096,
      "step": 2010
    },
    {
      "epoch": 0.12924951892238615,
      "grad_norm": 0.7219277816800387,
      "learning_rate": 0.00019947923129197862,
      "loss": 0.7127,
      "step": 2015
    },
    {
      "epoch": 0.12957023733162285,
      "grad_norm": 0.8406570776216719,
      "learning_rate": 0.0001994677585108546,
      "loss": 0.6191,
      "step": 2020
    },
    {
      "epoch": 0.12989095574085952,
      "grad_norm": 0.7458490203268737,
      "learning_rate": 0.00019945616106465904,
      "loss": 0.5701,
      "step": 2025
    },
    {
      "epoch": 0.1302116741500962,
      "grad_norm": 1.293735176011679,
      "learning_rate": 0.0001994444389679272,
      "loss": 0.6852,
      "step": 2030
    },
    {
      "epoch": 0.1305323925593329,
      "grad_norm": 0.9148850105541353,
      "learning_rate": 0.00019943259223535067,
      "loss": 0.7057,
      "step": 2035
    },
    {
      "epoch": 0.1308531109685696,
      "grad_norm": 0.6641079479178653,
      "learning_rate": 0.0001994206208817772,
      "loss": 0.7629,
      "step": 2040
    },
    {
      "epoch": 0.13117382937780628,
      "grad_norm": 0.791984066260629,
      "learning_rate": 0.00019940852492221075,
      "loss": 0.6992,
      "step": 2045
    },
    {
      "epoch": 0.13149454778704298,
      "grad_norm": 0.849479398893481,
      "learning_rate": 0.00019939630437181143,
      "loss": 0.6966,
      "step": 2050
    },
    {
      "epoch": 0.13181526619627967,
      "grad_norm": 0.8367106501858504,
      "learning_rate": 0.00019938395924589552,
      "loss": 0.5852,
      "step": 2055
    },
    {
      "epoch": 0.13213598460551634,
      "grad_norm": 0.6790358847768235,
      "learning_rate": 0.00019937148955993545,
      "loss": 0.6393,
      "step": 2060
    },
    {
      "epoch": 0.13245670301475304,
      "grad_norm": 0.9502499514885022,
      "learning_rate": 0.00019935889532955977,
      "loss": 0.6777,
      "step": 2065
    },
    {
      "epoch": 0.13277742142398974,
      "grad_norm": 0.8134631960781032,
      "learning_rate": 0.000199346176570553,
      "loss": 0.6862,
      "step": 2070
    },
    {
      "epoch": 0.13309813983322644,
      "grad_norm": 0.6366664689319048,
      "learning_rate": 0.00019933333329885593,
      "loss": 0.604,
      "step": 2075
    },
    {
      "epoch": 0.1334188582424631,
      "grad_norm": 0.8465634973529981,
      "learning_rate": 0.00019932036553056524,
      "loss": 0.7162,
      "step": 2080
    },
    {
      "epoch": 0.1337395766516998,
      "grad_norm": 0.8425039370601171,
      "learning_rate": 0.00019930727328193378,
      "loss": 0.6855,
      "step": 2085
    },
    {
      "epoch": 0.1340602950609365,
      "grad_norm": 1.14970228361299,
      "learning_rate": 0.00019929405656937032,
      "loss": 0.7191,
      "step": 2090
    },
    {
      "epoch": 0.1343810134701732,
      "grad_norm": 1.0969227215850126,
      "learning_rate": 0.0001992807154094396,
      "loss": 0.728,
      "step": 2095
    },
    {
      "epoch": 0.13470173187940987,
      "grad_norm": 0.5634883710558874,
      "learning_rate": 0.00019926724981886244,
      "loss": 0.6794,
      "step": 2100
    },
    {
      "epoch": 0.13502245028864657,
      "grad_norm": 0.9532151941436401,
      "learning_rate": 0.0001992536598145155,
      "loss": 0.6422,
      "step": 2105
    },
    {
      "epoch": 0.13534316869788326,
      "grad_norm": 0.8529397357920244,
      "learning_rate": 0.0001992399454134315,
      "loss": 0.8323,
      "step": 2110
    },
    {
      "epoch": 0.13566388710711993,
      "grad_norm": 0.5995161683553816,
      "learning_rate": 0.00019922610663279894,
      "loss": 0.6443,
      "step": 2115
    },
    {
      "epoch": 0.13598460551635663,
      "grad_norm": 1.1645114047730085,
      "learning_rate": 0.00019921214348996228,
      "loss": 0.638,
      "step": 2120
    },
    {
      "epoch": 0.13630532392559333,
      "grad_norm": 0.7254426735765782,
      "learning_rate": 0.00019919805600242176,
      "loss": 0.6233,
      "step": 2125
    },
    {
      "epoch": 0.13662604233483003,
      "grad_norm": 1.2630556570142795,
      "learning_rate": 0.00019918384418783362,
      "loss": 0.7941,
      "step": 2130
    },
    {
      "epoch": 0.1369467607440667,
      "grad_norm": 0.5842349667453849,
      "learning_rate": 0.00019916950806400983,
      "loss": 0.7714,
      "step": 2135
    },
    {
      "epoch": 0.1372674791533034,
      "grad_norm": 0.5918400976970277,
      "learning_rate": 0.00019915504764891808,
      "loss": 0.7118,
      "step": 2140
    },
    {
      "epoch": 0.1375881975625401,
      "grad_norm": 0.8666504796220831,
      "learning_rate": 0.000199140462960682,
      "loss": 0.7462,
      "step": 2145
    },
    {
      "epoch": 0.1379089159717768,
      "grad_norm": 0.7764199666330917,
      "learning_rate": 0.00019912575401758082,
      "loss": 0.6395,
      "step": 2150
    },
    {
      "epoch": 0.13822963438101346,
      "grad_norm": 0.9186504138753783,
      "learning_rate": 0.00019911092083804962,
      "loss": 0.6289,
      "step": 2155
    },
    {
      "epoch": 0.13855035279025016,
      "grad_norm": 0.8035713423211853,
      "learning_rate": 0.00019909596344067914,
      "loss": 0.7541,
      "step": 2160
    },
    {
      "epoch": 0.13887107119948686,
      "grad_norm": 0.7189520752077799,
      "learning_rate": 0.00019908088184421578,
      "loss": 0.6826,
      "step": 2165
    },
    {
      "epoch": 0.13919178960872355,
      "grad_norm": 0.6655350088157191,
      "learning_rate": 0.00019906567606756167,
      "loss": 0.7409,
      "step": 2170
    },
    {
      "epoch": 0.13951250801796022,
      "grad_norm": 0.3224597929224718,
      "learning_rate": 0.0001990503461297745,
      "loss": 0.5904,
      "step": 2175
    },
    {
      "epoch": 0.13983322642719692,
      "grad_norm": 0.8267424045917116,
      "learning_rate": 0.00019903489205006764,
      "loss": 0.6894,
      "step": 2180
    },
    {
      "epoch": 0.14015394483643362,
      "grad_norm": 0.6123341217762982,
      "learning_rate": 0.00019901931384780995,
      "loss": 0.703,
      "step": 2185
    },
    {
      "epoch": 0.1404746632456703,
      "grad_norm": 0.45163827780119753,
      "learning_rate": 0.00019900361154252602,
      "loss": 0.59,
      "step": 2190
    },
    {
      "epoch": 0.140795381654907,
      "grad_norm": 0.9556170145817368,
      "learning_rate": 0.00019898778515389584,
      "loss": 0.5857,
      "step": 2195
    },
    {
      "epoch": 0.14111610006414368,
      "grad_norm": 0.7479105122087544,
      "learning_rate": 0.00019897183470175494,
      "loss": 0.6585,
      "step": 2200
    },
    {
      "epoch": 0.14143681847338038,
      "grad_norm": 1.0326719597420064,
      "learning_rate": 0.0001989557602060944,
      "loss": 0.7534,
      "step": 2205
    },
    {
      "epoch": 0.14175753688261705,
      "grad_norm": 0.8658293920784573,
      "learning_rate": 0.00019893956168706066,
      "loss": 0.7002,
      "step": 2210
    },
    {
      "epoch": 0.14207825529185375,
      "grad_norm": 0.8622344203075765,
      "learning_rate": 0.00019892323916495582,
      "loss": 0.7086,
      "step": 2215
    },
    {
      "epoch": 0.14239897370109045,
      "grad_norm": 0.7259813554322444,
      "learning_rate": 0.00019890679266023709,
      "loss": 0.4999,
      "step": 2220
    },
    {
      "epoch": 0.14271969211032715,
      "grad_norm": 0.6647794000879613,
      "learning_rate": 0.0001988902221935173,
      "loss": 0.7005,
      "step": 2225
    },
    {
      "epoch": 0.14304041051956382,
      "grad_norm": 0.8451399712054074,
      "learning_rate": 0.00019887352778556454,
      "loss": 0.6435,
      "step": 2230
    },
    {
      "epoch": 0.1433611289288005,
      "grad_norm": 0.7567525634116421,
      "learning_rate": 0.0001988567094573023,
      "loss": 0.7609,
      "step": 2235
    },
    {
      "epoch": 0.1436818473380372,
      "grad_norm": 0.8106441964345322,
      "learning_rate": 0.00019883976722980936,
      "loss": 0.6969,
      "step": 2240
    },
    {
      "epoch": 0.14400256574727388,
      "grad_norm": 0.6312440700944748,
      "learning_rate": 0.00019882270112431974,
      "loss": 0.6787,
      "step": 2245
    },
    {
      "epoch": 0.14432328415651058,
      "grad_norm": 0.8698670635315567,
      "learning_rate": 0.00019880551116222277,
      "loss": 0.79,
      "step": 2250
    },
    {
      "epoch": 0.14464400256574728,
      "grad_norm": 0.5675337075202405,
      "learning_rate": 0.00019878819736506297,
      "loss": 0.6922,
      "step": 2255
    },
    {
      "epoch": 0.14496472097498397,
      "grad_norm": 0.8080748220001619,
      "learning_rate": 0.00019877075975454015,
      "loss": 0.6723,
      "step": 2260
    },
    {
      "epoch": 0.14528543938422064,
      "grad_norm": 1.18598966284805,
      "learning_rate": 0.00019875319835250922,
      "loss": 0.6078,
      "step": 2265
    },
    {
      "epoch": 0.14560615779345734,
      "grad_norm": 0.7396735588781944,
      "learning_rate": 0.00019873551318098026,
      "loss": 0.6555,
      "step": 2270
    },
    {
      "epoch": 0.14592687620269404,
      "grad_norm": 0.9421384978371221,
      "learning_rate": 0.00019871770426211843,
      "loss": 0.6763,
      "step": 2275
    },
    {
      "epoch": 0.14624759461193074,
      "grad_norm": 1.3557865695262534,
      "learning_rate": 0.0001986997716182441,
      "loss": 0.6517,
      "step": 2280
    },
    {
      "epoch": 0.1465683130211674,
      "grad_norm": 0.7620628179190014,
      "learning_rate": 0.0001986817152718326,
      "loss": 0.8213,
      "step": 2285
    },
    {
      "epoch": 0.1468890314304041,
      "grad_norm": 1.1665229535256436,
      "learning_rate": 0.0001986635352455143,
      "loss": 0.6593,
      "step": 2290
    },
    {
      "epoch": 0.1472097498396408,
      "grad_norm": 0.549262325529975,
      "learning_rate": 0.0001986452315620747,
      "loss": 0.5682,
      "step": 2295
    },
    {
      "epoch": 0.14753046824887747,
      "grad_norm": 0.6290840720109729,
      "learning_rate": 0.00019862680424445413,
      "loss": 0.5891,
      "step": 2300
    },
    {
      "epoch": 0.14785118665811417,
      "grad_norm": 0.6806013181414412,
      "learning_rate": 0.00019860825331574798,
      "loss": 0.7814,
      "step": 2305
    },
    {
      "epoch": 0.14817190506735087,
      "grad_norm": 0.9105112621167852,
      "learning_rate": 0.00019858957879920647,
      "loss": 0.6707,
      "step": 2310
    },
    {
      "epoch": 0.14849262347658757,
      "grad_norm": 0.8528821816779594,
      "learning_rate": 0.00019857078071823484,
      "loss": 0.664,
      "step": 2315
    },
    {
      "epoch": 0.14881334188582424,
      "grad_norm": 0.7181914153458827,
      "learning_rate": 0.0001985518590963931,
      "loss": 0.6854,
      "step": 2320
    },
    {
      "epoch": 0.14913406029506093,
      "grad_norm": 0.7397278453494517,
      "learning_rate": 0.00019853281395739613,
      "loss": 0.6665,
      "step": 2325
    },
    {
      "epoch": 0.14945477870429763,
      "grad_norm": 0.8745968398949746,
      "learning_rate": 0.00019851364532511362,
      "loss": 0.5766,
      "step": 2330
    },
    {
      "epoch": 0.14977549711353433,
      "grad_norm": 1.2088886679730004,
      "learning_rate": 0.00019849435322356995,
      "loss": 0.7018,
      "step": 2335
    },
    {
      "epoch": 0.150096215522771,
      "grad_norm": 1.0443479254100274,
      "learning_rate": 0.00019847493767694444,
      "loss": 0.5986,
      "step": 2340
    },
    {
      "epoch": 0.1504169339320077,
      "grad_norm": 1.0921241128817574,
      "learning_rate": 0.00019845539870957092,
      "loss": 0.5923,
      "step": 2345
    },
    {
      "epoch": 0.1507376523412444,
      "grad_norm": 0.9646802917631114,
      "learning_rate": 0.00019843573634593806,
      "loss": 0.7926,
      "step": 2350
    },
    {
      "epoch": 0.1510583707504811,
      "grad_norm": 0.7656847484095911,
      "learning_rate": 0.00019841595061068906,
      "loss": 0.7207,
      "step": 2355
    },
    {
      "epoch": 0.15137908915971776,
      "grad_norm": 0.5049528849051477,
      "learning_rate": 0.0001983960415286219,
      "loss": 0.6228,
      "step": 2360
    },
    {
      "epoch": 0.15169980756895446,
      "grad_norm": 0.9068993192806996,
      "learning_rate": 0.00019837600912468893,
      "loss": 0.5693,
      "step": 2365
    },
    {
      "epoch": 0.15202052597819116,
      "grad_norm": 0.8676250105736654,
      "learning_rate": 0.00019835585342399732,
      "loss": 0.5743,
      "step": 2370
    },
    {
      "epoch": 0.15234124438742783,
      "grad_norm": 0.5246385631697503,
      "learning_rate": 0.00019833557445180855,
      "loss": 0.7401,
      "step": 2375
    },
    {
      "epoch": 0.15266196279666452,
      "grad_norm": 0.7016792226152242,
      "learning_rate": 0.0001983151722335387,
      "loss": 0.6403,
      "step": 2380
    },
    {
      "epoch": 0.15298268120590122,
      "grad_norm": 0.7722496289657441,
      "learning_rate": 0.00019829464679475836,
      "loss": 0.5484,
      "step": 2385
    },
    {
      "epoch": 0.15330339961513792,
      "grad_norm": 1.2298123662291214,
      "learning_rate": 0.00019827399816119243,
      "loss": 0.7674,
      "step": 2390
    },
    {
      "epoch": 0.1536241180243746,
      "grad_norm": 0.7861238282945989,
      "learning_rate": 0.00019825322635872036,
      "loss": 0.619,
      "step": 2395
    },
    {
      "epoch": 0.1539448364336113,
      "grad_norm": 0.9211911752664865,
      "learning_rate": 0.00019823233141337584,
      "loss": 0.6211,
      "step": 2400
    },
    {
      "epoch": 0.15426555484284799,
      "grad_norm": 0.7151255909037463,
      "learning_rate": 0.00019821131335134696,
      "loss": 0.548,
      "step": 2405
    },
    {
      "epoch": 0.15458627325208468,
      "grad_norm": 0.9458426635711992,
      "learning_rate": 0.00019819017219897613,
      "loss": 0.6482,
      "step": 2410
    },
    {
      "epoch": 0.15490699166132135,
      "grad_norm": 1.0258204800171964,
      "learning_rate": 0.00019816890798276,
      "loss": 0.6717,
      "step": 2415
    },
    {
      "epoch": 0.15522771007055805,
      "grad_norm": 1.2116376507078799,
      "learning_rate": 0.00019814752072934945,
      "loss": 0.6242,
      "step": 2420
    },
    {
      "epoch": 0.15554842847979475,
      "grad_norm": 0.7799968415850017,
      "learning_rate": 0.00019812601046554962,
      "loss": 0.6257,
      "step": 2425
    },
    {
      "epoch": 0.15586914688903142,
      "grad_norm": 0.4916761578519649,
      "learning_rate": 0.00019810437721831976,
      "loss": 0.7221,
      "step": 2430
    },
    {
      "epoch": 0.15618986529826812,
      "grad_norm": 0.9089669003206741,
      "learning_rate": 0.00019808262101477328,
      "loss": 0.6457,
      "step": 2435
    },
    {
      "epoch": 0.1565105837075048,
      "grad_norm": 0.5752941624716728,
      "learning_rate": 0.00019806074188217766,
      "loss": 0.5367,
      "step": 2440
    },
    {
      "epoch": 0.1568313021167415,
      "grad_norm": 0.7889396413468497,
      "learning_rate": 0.0001980387398479546,
      "loss": 0.5704,
      "step": 2445
    },
    {
      "epoch": 0.15715202052597818,
      "grad_norm": 0.7974301152247996,
      "learning_rate": 0.00019801661493967955,
      "loss": 0.7438,
      "step": 2450
    },
    {
      "epoch": 0.15747273893521488,
      "grad_norm": 0.9099718674001662,
      "learning_rate": 0.00019799436718508228,
      "loss": 0.7057,
      "step": 2455
    },
    {
      "epoch": 0.15779345734445158,
      "grad_norm": 0.7460789907183486,
      "learning_rate": 0.0001979719966120463,
      "loss": 0.6769,
      "step": 2460
    },
    {
      "epoch": 0.15811417575368827,
      "grad_norm": 0.9026682063218279,
      "learning_rate": 0.00019794950324860918,
      "loss": 0.6677,
      "step": 2465
    },
    {
      "epoch": 0.15843489416292494,
      "grad_norm": 0.706813388972018,
      "learning_rate": 0.0001979268871229623,
      "loss": 0.652,
      "step": 2470
    },
    {
      "epoch": 0.15875561257216164,
      "grad_norm": 0.7951893501420781,
      "learning_rate": 0.00019790414826345094,
      "loss": 0.7231,
      "step": 2475
    },
    {
      "epoch": 0.15907633098139834,
      "grad_norm": 0.9695064104615378,
      "learning_rate": 0.0001978812866985742,
      "loss": 0.6308,
      "step": 2480
    },
    {
      "epoch": 0.159397049390635,
      "grad_norm": 0.5344509876021667,
      "learning_rate": 0.00019785830245698497,
      "loss": 0.6997,
      "step": 2485
    },
    {
      "epoch": 0.1597177677998717,
      "grad_norm": 0.834051661967047,
      "learning_rate": 0.00019783519556748987,
      "loss": 0.6783,
      "step": 2490
    },
    {
      "epoch": 0.1600384862091084,
      "grad_norm": 0.9723305146917021,
      "learning_rate": 0.0001978119660590493,
      "loss": 0.7798,
      "step": 2495
    },
    {
      "epoch": 0.1603592046183451,
      "grad_norm": 0.8859242414039744,
      "learning_rate": 0.00019778861396077725,
      "loss": 0.793,
      "step": 2500
    },
    {
      "epoch": 0.16067992302758177,
      "grad_norm": 0.7241777810857887,
      "learning_rate": 0.00019776513930194148,
      "loss": 0.504,
      "step": 2505
    },
    {
      "epoch": 0.16100064143681847,
      "grad_norm": 1.054121315907452,
      "learning_rate": 0.00019774154211196318,
      "loss": 0.7509,
      "step": 2510
    },
    {
      "epoch": 0.16132135984605517,
      "grad_norm": 0.8701449793412597,
      "learning_rate": 0.0001977178224204173,
      "loss": 0.7875,
      "step": 2515
    },
    {
      "epoch": 0.16164207825529187,
      "grad_norm": 0.7757819809049686,
      "learning_rate": 0.00019769398025703224,
      "loss": 0.6047,
      "step": 2520
    },
    {
      "epoch": 0.16196279666452854,
      "grad_norm": 1.0713357367053484,
      "learning_rate": 0.00019767001565168982,
      "loss": 0.7384,
      "step": 2525
    },
    {
      "epoch": 0.16228351507376523,
      "grad_norm": 0.43793306094407475,
      "learning_rate": 0.00019764592863442544,
      "loss": 0.6156,
      "step": 2530
    },
    {
      "epoch": 0.16260423348300193,
      "grad_norm": 0.9034469617213254,
      "learning_rate": 0.0001976217192354279,
      "loss": 0.6383,
      "step": 2535
    },
    {
      "epoch": 0.1629249518922386,
      "grad_norm": 0.7090465404578327,
      "learning_rate": 0.0001975973874850393,
      "loss": 0.59,
      "step": 2540
    },
    {
      "epoch": 0.1632456703014753,
      "grad_norm": 0.7781025944113742,
      "learning_rate": 0.00019757293341375517,
      "loss": 0.6829,
      "step": 2545
    },
    {
      "epoch": 0.163566388710712,
      "grad_norm": 0.701765797555506,
      "learning_rate": 0.00019754835705222435,
      "loss": 0.6682,
      "step": 2550
    },
    {
      "epoch": 0.1638871071199487,
      "grad_norm": 0.8486110822681391,
      "learning_rate": 0.00019752365843124885,
      "loss": 0.7107,
      "step": 2555
    },
    {
      "epoch": 0.16420782552918536,
      "grad_norm": 1.2183183484648679,
      "learning_rate": 0.00019749883758178404,
      "loss": 0.7092,
      "step": 2560
    },
    {
      "epoch": 0.16452854393842206,
      "grad_norm": 0.5747438190450085,
      "learning_rate": 0.0001974738945349384,
      "loss": 0.5635,
      "step": 2565
    },
    {
      "epoch": 0.16484926234765876,
      "grad_norm": 0.754766366798954,
      "learning_rate": 0.0001974488293219736,
      "loss": 0.7561,
      "step": 2570
    },
    {
      "epoch": 0.16516998075689546,
      "grad_norm": 0.9579439740753294,
      "learning_rate": 0.00019742364197430443,
      "loss": 0.6015,
      "step": 2575
    },
    {
      "epoch": 0.16549069916613213,
      "grad_norm": 0.6786544154968012,
      "learning_rate": 0.00019739833252349867,
      "loss": 0.5112,
      "step": 2580
    },
    {
      "epoch": 0.16581141757536882,
      "grad_norm": 0.7934214823629537,
      "learning_rate": 0.00019737290100127722,
      "loss": 0.7203,
      "step": 2585
    },
    {
      "epoch": 0.16613213598460552,
      "grad_norm": 1.33220621050734,
      "learning_rate": 0.00019734734743951396,
      "loss": 0.6863,
      "step": 2590
    },
    {
      "epoch": 0.16645285439384222,
      "grad_norm": 0.8267900862256077,
      "learning_rate": 0.00019732167187023572,
      "loss": 0.6449,
      "step": 2595
    },
    {
      "epoch": 0.1667735728030789,
      "grad_norm": 0.7287938245757929,
      "learning_rate": 0.0001972958743256222,
      "loss": 0.7308,
      "step": 2600
    },
    {
      "epoch": 0.1670942912123156,
      "grad_norm": 0.5363094807734924,
      "learning_rate": 0.00019726995483800613,
      "loss": 0.6403,
      "step": 2605
    },
    {
      "epoch": 0.16741500962155229,
      "grad_norm": 0.7277617239159246,
      "learning_rate": 0.00019724391343987284,
      "loss": 0.6777,
      "step": 2610
    },
    {
      "epoch": 0.16773572803078896,
      "grad_norm": 0.9462519719607535,
      "learning_rate": 0.00019721775016386057,
      "loss": 0.6895,
      "step": 2615
    },
    {
      "epoch": 0.16805644644002565,
      "grad_norm": 0.8528897030121969,
      "learning_rate": 0.0001971914650427604,
      "loss": 0.5536,
      "step": 2620
    },
    {
      "epoch": 0.16837716484926235,
      "grad_norm": 0.9319172497451002,
      "learning_rate": 0.000197165058109516,
      "loss": 0.5724,
      "step": 2625
    },
    {
      "epoch": 0.16869788325849905,
      "grad_norm": 0.7410196474628663,
      "learning_rate": 0.0001971385293972237,
      "loss": 0.6785,
      "step": 2630
    },
    {
      "epoch": 0.16901860166773572,
      "grad_norm": 0.9192207798068145,
      "learning_rate": 0.00019711187893913255,
      "loss": 0.7219,
      "step": 2635
    },
    {
      "epoch": 0.16933932007697242,
      "grad_norm": 0.5750937169325536,
      "learning_rate": 0.00019708510676864414,
      "loss": 0.482,
      "step": 2640
    },
    {
      "epoch": 0.1696600384862091,
      "grad_norm": 0.7158603995106417,
      "learning_rate": 0.0001970582129193126,
      "loss": 0.577,
      "step": 2645
    },
    {
      "epoch": 0.1699807568954458,
      "grad_norm": 0.9152254783119084,
      "learning_rate": 0.00019703119742484453,
      "loss": 0.6657,
      "step": 2650
    },
    {
      "epoch": 0.17030147530468248,
      "grad_norm": 0.7435319188039847,
      "learning_rate": 0.00019700406031909905,
      "loss": 0.6779,
      "step": 2655
    },
    {
      "epoch": 0.17062219371391918,
      "grad_norm": 1.504228508241335,
      "learning_rate": 0.0001969768016360877,
      "loss": 0.7278,
      "step": 2660
    },
    {
      "epoch": 0.17094291212315588,
      "grad_norm": 1.2092049917834673,
      "learning_rate": 0.00019694942140997435,
      "loss": 0.7341,
      "step": 2665
    },
    {
      "epoch": 0.17126363053239255,
      "grad_norm": 0.6080302726719192,
      "learning_rate": 0.00019692191967507524,
      "loss": 0.6543,
      "step": 2670
    },
    {
      "epoch": 0.17158434894162924,
      "grad_norm": 0.7373008700852878,
      "learning_rate": 0.0001968942964658589,
      "loss": 0.6152,
      "step": 2675
    },
    {
      "epoch": 0.17190506735086594,
      "grad_norm": 0.9214476765346659,
      "learning_rate": 0.000196866551816946,
      "loss": 0.6878,
      "step": 2680
    },
    {
      "epoch": 0.17222578576010264,
      "grad_norm": 0.7450194855735123,
      "learning_rate": 0.0001968386857631096,
      "loss": 0.6173,
      "step": 2685
    },
    {
      "epoch": 0.1725465041693393,
      "grad_norm": 0.6242054305521421,
      "learning_rate": 0.00019681069833927476,
      "loss": 0.6746,
      "step": 2690
    },
    {
      "epoch": 0.172867222578576,
      "grad_norm": 0.711220248168634,
      "learning_rate": 0.00019678258958051877,
      "loss": 0.6821,
      "step": 2695
    },
    {
      "epoch": 0.1731879409878127,
      "grad_norm": 0.7496584977206721,
      "learning_rate": 0.00019675435952207088,
      "loss": 0.5238,
      "step": 2700
    },
    {
      "epoch": 0.1735086593970494,
      "grad_norm": 0.7084413643635924,
      "learning_rate": 0.00019672600819931247,
      "loss": 0.7056,
      "step": 2705
    },
    {
      "epoch": 0.17382937780628607,
      "grad_norm": 1.0439027628488613,
      "learning_rate": 0.00019669753564777688,
      "loss": 0.6513,
      "step": 2710
    },
    {
      "epoch": 0.17415009621552277,
      "grad_norm": 0.71498067288977,
      "learning_rate": 0.0001966689419031493,
      "loss": 0.7406,
      "step": 2715
    },
    {
      "epoch": 0.17447081462475947,
      "grad_norm": 0.7033452927937216,
      "learning_rate": 0.00019664022700126695,
      "loss": 0.6923,
      "step": 2720
    },
    {
      "epoch": 0.17479153303399614,
      "grad_norm": 0.8919976779446186,
      "learning_rate": 0.00019661139097811877,
      "loss": 0.6326,
      "step": 2725
    },
    {
      "epoch": 0.17511225144323284,
      "grad_norm": 0.9493437873661492,
      "learning_rate": 0.00019658243386984562,
      "loss": 0.5783,
      "step": 2730
    },
    {
      "epoch": 0.17543296985246953,
      "grad_norm": 0.9860728443591087,
      "learning_rate": 0.00019655335571274003,
      "loss": 0.7279,
      "step": 2735
    },
    {
      "epoch": 0.17575368826170623,
      "grad_norm": 0.6352021684421743,
      "learning_rate": 0.0001965241565432463,
      "loss": 0.6397,
      "step": 2740
    },
    {
      "epoch": 0.1760744066709429,
      "grad_norm": 1.099016920497353,
      "learning_rate": 0.00019649483639796032,
      "loss": 0.6756,
      "step": 2745
    },
    {
      "epoch": 0.1763951250801796,
      "grad_norm": 0.7058834343210731,
      "learning_rate": 0.00019646539531362973,
      "loss": 0.7218,
      "step": 2750
    },
    {
      "epoch": 0.1767158434894163,
      "grad_norm": 0.8020832284905198,
      "learning_rate": 0.00019643583332715366,
      "loss": 0.5708,
      "step": 2755
    },
    {
      "epoch": 0.177036561898653,
      "grad_norm": 0.8014855578510585,
      "learning_rate": 0.0001964061504755827,
      "loss": 0.7843,
      "step": 2760
    },
    {
      "epoch": 0.17735728030788966,
      "grad_norm": 1.0134184586337234,
      "learning_rate": 0.0001963763467961191,
      "loss": 0.6599,
      "step": 2765
    },
    {
      "epoch": 0.17767799871712636,
      "grad_norm": 0.6050193347531744,
      "learning_rate": 0.0001963464223261164,
      "loss": 0.7984,
      "step": 2770
    },
    {
      "epoch": 0.17799871712636306,
      "grad_norm": 0.7479913165773774,
      "learning_rate": 0.0001963163771030796,
      "loss": 0.7469,
      "step": 2775
    },
    {
      "epoch": 0.17831943553559973,
      "grad_norm": 1.091278392341476,
      "learning_rate": 0.00019628621116466502,
      "loss": 0.6991,
      "step": 2780
    },
    {
      "epoch": 0.17864015394483643,
      "grad_norm": 1.0105012542968526,
      "learning_rate": 0.00019625592454868026,
      "loss": 0.6867,
      "step": 2785
    },
    {
      "epoch": 0.17896087235407312,
      "grad_norm": 0.8032083651463552,
      "learning_rate": 0.0001962255172930842,
      "loss": 0.7184,
      "step": 2790
    },
    {
      "epoch": 0.17928159076330982,
      "grad_norm": 0.8193497605449357,
      "learning_rate": 0.00019619498943598688,
      "loss": 0.5785,
      "step": 2795
    },
    {
      "epoch": 0.1796023091725465,
      "grad_norm": 0.7772046040254091,
      "learning_rate": 0.00019616434101564956,
      "loss": 0.7471,
      "step": 2800
    },
    {
      "epoch": 0.1799230275817832,
      "grad_norm": 1.224565960941351,
      "learning_rate": 0.00019613357207048452,
      "loss": 0.856,
      "step": 2805
    },
    {
      "epoch": 0.1802437459910199,
      "grad_norm": 0.6591412427417273,
      "learning_rate": 0.00019610268263905515,
      "loss": 0.5893,
      "step": 2810
    },
    {
      "epoch": 0.18056446440025659,
      "grad_norm": 0.8875976837711199,
      "learning_rate": 0.00019607167276007587,
      "loss": 0.7161,
      "step": 2815
    },
    {
      "epoch": 0.18088518280949326,
      "grad_norm": 0.8225479052301773,
      "learning_rate": 0.00019604054247241193,
      "loss": 0.5873,
      "step": 2820
    },
    {
      "epoch": 0.18120590121872995,
      "grad_norm": 1.2087539785527361,
      "learning_rate": 0.00019600929181507972,
      "loss": 0.6542,
      "step": 2825
    },
    {
      "epoch": 0.18152661962796665,
      "grad_norm": 0.8050140113302814,
      "learning_rate": 0.00019597792082724625,
      "loss": 0.5778,
      "step": 2830
    },
    {
      "epoch": 0.18184733803720335,
      "grad_norm": 1.321288241534433,
      "learning_rate": 0.00019594642954822952,
      "loss": 0.5994,
      "step": 2835
    },
    {
      "epoch": 0.18216805644644002,
      "grad_norm": 0.9376939681240336,
      "learning_rate": 0.00019591481801749816,
      "loss": 0.5046,
      "step": 2840
    },
    {
      "epoch": 0.18248877485567672,
      "grad_norm": 0.6185458970009285,
      "learning_rate": 0.00019588308627467162,
      "loss": 0.6859,
      "step": 2845
    },
    {
      "epoch": 0.18280949326491341,
      "grad_norm": 0.7801762201714135,
      "learning_rate": 0.00019585123435952,
      "loss": 0.7015,
      "step": 2850
    },
    {
      "epoch": 0.18313021167415008,
      "grad_norm": 0.7265831165052501,
      "learning_rate": 0.00019581926231196391,
      "loss": 0.823,
      "step": 2855
    },
    {
      "epoch": 0.18345093008338678,
      "grad_norm": 0.8151220320154888,
      "learning_rate": 0.00019578717017207467,
      "loss": 0.689,
      "step": 2860
    },
    {
      "epoch": 0.18377164849262348,
      "grad_norm": 0.9213195972340709,
      "learning_rate": 0.000195754957980074,
      "loss": 0.7382,
      "step": 2865
    },
    {
      "epoch": 0.18409236690186018,
      "grad_norm": 0.782822592817081,
      "learning_rate": 0.0001957226257763342,
      "loss": 0.6929,
      "step": 2870
    },
    {
      "epoch": 0.18441308531109685,
      "grad_norm": 0.980335474676683,
      "learning_rate": 0.0001956901736013778,
      "loss": 0.6156,
      "step": 2875
    },
    {
      "epoch": 0.18473380372033354,
      "grad_norm": 0.9039810035947186,
      "learning_rate": 0.00019565760149587794,
      "loss": 0.7664,
      "step": 2880
    },
    {
      "epoch": 0.18505452212957024,
      "grad_norm": 0.000701834979829147,
      "learning_rate": 0.0001956249095006578,
      "loss": 0.5249,
      "step": 2885
    },
    {
      "epoch": 0.18537524053880694,
      "grad_norm": 1.0237955976436885,
      "learning_rate": 0.00019559209765669105,
      "loss": 0.6839,
      "step": 2890
    },
    {
      "epoch": 0.1856959589480436,
      "grad_norm": 0.6769833810242086,
      "learning_rate": 0.00019555916600510145,
      "loss": 0.6537,
      "step": 2895
    },
    {
      "epoch": 0.1860166773572803,
      "grad_norm": 0.6462485885713231,
      "learning_rate": 0.00019552611458716296,
      "loss": 0.723,
      "step": 2900
    },
    {
      "epoch": 0.186337395766517,
      "grad_norm": 0.8722147531755802,
      "learning_rate": 0.0001954929434442996,
      "loss": 0.6837,
      "step": 2905
    },
    {
      "epoch": 0.18665811417575368,
      "grad_norm": 0.6906487731551919,
      "learning_rate": 0.0001954596526180855,
      "loss": 0.6678,
      "step": 2910
    },
    {
      "epoch": 0.18697883258499037,
      "grad_norm": 0.8754536117451718,
      "learning_rate": 0.00019542624215024474,
      "loss": 0.7607,
      "step": 2915
    },
    {
      "epoch": 0.18729955099422707,
      "grad_norm": 0.7481215119155424,
      "learning_rate": 0.0001953927120826514,
      "loss": 0.7354,
      "step": 2920
    },
    {
      "epoch": 0.18762026940346377,
      "grad_norm": 0.7173045174318763,
      "learning_rate": 0.0001953590624573294,
      "loss": 0.6889,
      "step": 2925
    },
    {
      "epoch": 0.18794098781270044,
      "grad_norm": 0.688657494500447,
      "learning_rate": 0.00019532529331645258,
      "loss": 0.7716,
      "step": 2930
    },
    {
      "epoch": 0.18826170622193714,
      "grad_norm": 0.8542179699315836,
      "learning_rate": 0.0001952914047023445,
      "loss": 0.6846,
      "step": 2935
    },
    {
      "epoch": 0.18858242463117383,
      "grad_norm": 0.6693936334963977,
      "learning_rate": 0.0001952573966574785,
      "loss": 0.6893,
      "step": 2940
    },
    {
      "epoch": 0.18890314304041053,
      "grad_norm": 1.1047249058364512,
      "learning_rate": 0.00019522326922447755,
      "loss": 0.7203,
      "step": 2945
    },
    {
      "epoch": 0.1892238614496472,
      "grad_norm": 0.6082855408476369,
      "learning_rate": 0.00019518902244611435,
      "loss": 0.7069,
      "step": 2950
    },
    {
      "epoch": 0.1895445798588839,
      "grad_norm": 0.5867678432004605,
      "learning_rate": 0.00019515465636531107,
      "loss": 0.7485,
      "step": 2955
    },
    {
      "epoch": 0.1898652982681206,
      "grad_norm": 0.6389524482986783,
      "learning_rate": 0.0001951201710251395,
      "loss": 0.6291,
      "step": 2960
    },
    {
      "epoch": 0.19018601667735727,
      "grad_norm": 0.40852828777296263,
      "learning_rate": 0.00019508556646882083,
      "loss": 0.6572,
      "step": 2965
    },
    {
      "epoch": 0.19050673508659396,
      "grad_norm": 0.6625359401782684,
      "learning_rate": 0.00019505084273972568,
      "loss": 0.6905,
      "step": 2970
    },
    {
      "epoch": 0.19082745349583066,
      "grad_norm": 0.6733266631590418,
      "learning_rate": 0.00019501599988137406,
      "loss": 0.6065,
      "step": 2975
    },
    {
      "epoch": 0.19114817190506736,
      "grad_norm": 0.8217762217578838,
      "learning_rate": 0.00019498103793743528,
      "loss": 0.6843,
      "step": 2980
    },
    {
      "epoch": 0.19146889031430403,
      "grad_norm": 1.220514466724885,
      "learning_rate": 0.00019494595695172787,
      "loss": 0.604,
      "step": 2985
    },
    {
      "epoch": 0.19178960872354073,
      "grad_norm": 0.792446196427873,
      "learning_rate": 0.00019491075696821962,
      "loss": 0.6326,
      "step": 2990
    },
    {
      "epoch": 0.19211032713277743,
      "grad_norm": 0.8158356531364367,
      "learning_rate": 0.00019487543803102736,
      "loss": 0.7795,
      "step": 2995
    },
    {
      "epoch": 0.19243104554201412,
      "grad_norm": 1.3297681323714916,
      "learning_rate": 0.00019484000018441715,
      "loss": 0.6776,
      "step": 3000
    },
    {
      "epoch": 0.1927517639512508,
      "grad_norm": 1.1206878255004398,
      "learning_rate": 0.00019480444347280392,
      "loss": 0.7425,
      "step": 3005
    },
    {
      "epoch": 0.1930724823604875,
      "grad_norm": 0.5668482553685025,
      "learning_rate": 0.00019476876794075168,
      "loss": 0.6463,
      "step": 3010
    },
    {
      "epoch": 0.1933932007697242,
      "grad_norm": 0.9274228876056752,
      "learning_rate": 0.0001947329736329734,
      "loss": 0.7253,
      "step": 3015
    },
    {
      "epoch": 0.19371391917896089,
      "grad_norm": 0.8934110376365801,
      "learning_rate": 0.0001946970605943308,
      "loss": 0.8008,
      "step": 3020
    },
    {
      "epoch": 0.19403463758819756,
      "grad_norm": 0.7054346176332205,
      "learning_rate": 0.00019466102886983445,
      "loss": 0.6421,
      "step": 3025
    },
    {
      "epoch": 0.19435535599743425,
      "grad_norm": 1.112312708275422,
      "learning_rate": 0.0001946248785046437,
      "loss": 0.5448,
      "step": 3030
    },
    {
      "epoch": 0.19467607440667095,
      "grad_norm": 0.9514480454813623,
      "learning_rate": 0.00019458860954406655,
      "loss": 0.8921,
      "step": 3035
    },
    {
      "epoch": 0.19499679281590762,
      "grad_norm": 0.8289559763958162,
      "learning_rate": 0.00019455222203355974,
      "loss": 0.6384,
      "step": 3040
    },
    {
      "epoch": 0.19531751122514432,
      "grad_norm": 1.6772904982725059,
      "learning_rate": 0.00019451571601872842,
      "loss": 0.593,
      "step": 3045
    },
    {
      "epoch": 0.19563822963438102,
      "grad_norm": 0.933959150583705,
      "learning_rate": 0.00019447909154532642,
      "loss": 0.7033,
      "step": 3050
    },
    {
      "epoch": 0.19595894804361771,
      "grad_norm": 0.9836848697506737,
      "learning_rate": 0.00019444234865925597,
      "loss": 0.694,
      "step": 3055
    },
    {
      "epoch": 0.19627966645285438,
      "grad_norm": 0.752058149609346,
      "learning_rate": 0.00019440548740656772,
      "loss": 0.8419,
      "step": 3060
    },
    {
      "epoch": 0.19660038486209108,
      "grad_norm": 0.5564595991041628,
      "learning_rate": 0.00019436850783346063,
      "loss": 0.5868,
      "step": 3065
    },
    {
      "epoch": 0.19692110327132778,
      "grad_norm": 1.1233031900082198,
      "learning_rate": 0.00019433140998628202,
      "loss": 0.7213,
      "step": 3070
    },
    {
      "epoch": 0.19724182168056448,
      "grad_norm": 0.9846847511141703,
      "learning_rate": 0.00019429419391152743,
      "loss": 0.6083,
      "step": 3075
    },
    {
      "epoch": 0.19756254008980115,
      "grad_norm": 0.9133697850179805,
      "learning_rate": 0.00019425685965584056,
      "loss": 0.7509,
      "step": 3080
    },
    {
      "epoch": 0.19788325849903785,
      "grad_norm": 1.1268873349974773,
      "learning_rate": 0.0001942194072660132,
      "loss": 0.6734,
      "step": 3085
    },
    {
      "epoch": 0.19820397690827454,
      "grad_norm": 0.663450697814864,
      "learning_rate": 0.00019418183678898525,
      "loss": 0.7093,
      "step": 3090
    },
    {
      "epoch": 0.1985246953175112,
      "grad_norm": 0.6245075928754343,
      "learning_rate": 0.0001941441482718446,
      "loss": 0.7194,
      "step": 3095
    },
    {
      "epoch": 0.1988454137267479,
      "grad_norm": 0.9587885835266485,
      "learning_rate": 0.00019410634176182705,
      "loss": 0.6995,
      "step": 3100
    },
    {
      "epoch": 0.1991661321359846,
      "grad_norm": 0.8163502504890695,
      "learning_rate": 0.00019406841730631636,
      "loss": 0.7503,
      "step": 3105
    },
    {
      "epoch": 0.1994868505452213,
      "grad_norm": 0.9426439782405206,
      "learning_rate": 0.00019403037495284398,
      "loss": 0.7404,
      "step": 3110
    },
    {
      "epoch": 0.19980756895445798,
      "grad_norm": 0.8220300785309613,
      "learning_rate": 0.00019399221474908932,
      "loss": 0.6744,
      "step": 3115
    },
    {
      "epoch": 0.20012828736369467,
      "grad_norm": 0.9955681688037235,
      "learning_rate": 0.00019395393674287927,
      "loss": 0.6852,
      "step": 3120
    },
    {
      "epoch": 0.20044900577293137,
      "grad_norm": 1.1278721654085937,
      "learning_rate": 0.00019391554098218853,
      "loss": 0.8426,
      "step": 3125
    },
    {
      "epoch": 0.20076972418216807,
      "grad_norm": 1.289322139002122,
      "learning_rate": 0.00019387702751513932,
      "loss": 0.7352,
      "step": 3130
    },
    {
      "epoch": 0.20109044259140474,
      "grad_norm": 1.4969951218148942,
      "learning_rate": 0.0001938383963900014,
      "loss": 0.7202,
      "step": 3135
    },
    {
      "epoch": 0.20141116100064144,
      "grad_norm": 0.8939306827167222,
      "learning_rate": 0.000193799647655192,
      "loss": 0.6326,
      "step": 3140
    },
    {
      "epoch": 0.20173187940987813,
      "grad_norm": 1.038193039895127,
      "learning_rate": 0.00019376078135927566,
      "loss": 0.5945,
      "step": 3145
    },
    {
      "epoch": 0.2020525978191148,
      "grad_norm": 0.8466700431352269,
      "learning_rate": 0.00019372179755096448,
      "loss": 0.4709,
      "step": 3150
    },
    {
      "epoch": 0.2023733162283515,
      "grad_norm": 0.8353167491615692,
      "learning_rate": 0.00019368269627911757,
      "loss": 0.6145,
      "step": 3155
    },
    {
      "epoch": 0.2026940346375882,
      "grad_norm": 0.5826569638112876,
      "learning_rate": 0.00019364347759274144,
      "loss": 0.6798,
      "step": 3160
    },
    {
      "epoch": 0.2030147530468249,
      "grad_norm": 0.6596971126256945,
      "learning_rate": 0.0001936041415409897,
      "loss": 0.7164,
      "step": 3165
    },
    {
      "epoch": 0.20333547145606157,
      "grad_norm": 1.1459761657771013,
      "learning_rate": 0.00019356468817316311,
      "loss": 0.6503,
      "step": 3170
    },
    {
      "epoch": 0.20365618986529826,
      "grad_norm": 0.6795054057142108,
      "learning_rate": 0.0001935251175387094,
      "loss": 0.624,
      "step": 3175
    },
    {
      "epoch": 0.20397690827453496,
      "grad_norm": 0.740763733162126,
      "learning_rate": 0.00019348542968722324,
      "loss": 0.6297,
      "step": 3180
    },
    {
      "epoch": 0.20429762668377166,
      "grad_norm": 0.7064796503029271,
      "learning_rate": 0.00019344562466844635,
      "loss": 0.6003,
      "step": 3185
    },
    {
      "epoch": 0.20461834509300833,
      "grad_norm": 1.6506358182547065,
      "learning_rate": 0.00019340570253226712,
      "loss": 0.4787,
      "step": 3190
    },
    {
      "epoch": 0.20493906350224503,
      "grad_norm": 1.1332295207671033,
      "learning_rate": 0.0001933656633287209,
      "loss": 0.7126,
      "step": 3195
    },
    {
      "epoch": 0.20525978191148173,
      "grad_norm": 0.617200353783866,
      "learning_rate": 0.00019332550710798966,
      "loss": 0.598,
      "step": 3200
    },
    {
      "epoch": 0.2055805003207184,
      "grad_norm": 0.868513802069887,
      "learning_rate": 0.000193285233920402,
      "loss": 0.7152,
      "step": 3205
    },
    {
      "epoch": 0.2059012187299551,
      "grad_norm": 1.1852925025104672,
      "learning_rate": 0.00019324484381643325,
      "loss": 0.7774,
      "step": 3210
    },
    {
      "epoch": 0.2062219371391918,
      "grad_norm": 1.0280680170586727,
      "learning_rate": 0.00019320433684670514,
      "loss": 0.7043,
      "step": 3215
    },
    {
      "epoch": 0.2065426555484285,
      "grad_norm": 0.6987881012001924,
      "learning_rate": 0.00019316371306198592,
      "loss": 0.7619,
      "step": 3220
    },
    {
      "epoch": 0.20686337395766516,
      "grad_norm": 0.8392027535004901,
      "learning_rate": 0.00019312297251319026,
      "loss": 0.6781,
      "step": 3225
    },
    {
      "epoch": 0.20718409236690186,
      "grad_norm": 1.2842078269698645,
      "learning_rate": 0.00019308211525137915,
      "loss": 0.7145,
      "step": 3230
    },
    {
      "epoch": 0.20750481077613855,
      "grad_norm": 0.6603411917591546,
      "learning_rate": 0.0001930411413277599,
      "loss": 0.6411,
      "step": 3235
    },
    {
      "epoch": 0.20782552918537525,
      "grad_norm": 1.3159150838945801,
      "learning_rate": 0.000193000050793686,
      "loss": 0.7067,
      "step": 3240
    },
    {
      "epoch": 0.20814624759461192,
      "grad_norm": 1.2826837962016335,
      "learning_rate": 0.0001929588437006571,
      "loss": 0.657,
      "step": 3245
    },
    {
      "epoch": 0.20846696600384862,
      "grad_norm": 0.7429467281992763,
      "learning_rate": 0.00019291752010031887,
      "loss": 0.6783,
      "step": 3250
    },
    {
      "epoch": 0.20878768441308532,
      "grad_norm": 0.9388767995389723,
      "learning_rate": 0.00019287608004446314,
      "loss": 0.6873,
      "step": 3255
    },
    {
      "epoch": 0.20910840282232201,
      "grad_norm": 0.8840070141339184,
      "learning_rate": 0.0001928345235850276,
      "loss": 0.6159,
      "step": 3260
    },
    {
      "epoch": 0.20942912123155868,
      "grad_norm": 1.0732885802726535,
      "learning_rate": 0.00019279285077409582,
      "loss": 0.6713,
      "step": 3265
    },
    {
      "epoch": 0.20974983964079538,
      "grad_norm": 0.7289657532988314,
      "learning_rate": 0.00019275106166389725,
      "loss": 0.6831,
      "step": 3270
    },
    {
      "epoch": 0.21007055805003208,
      "grad_norm": 0.6492856906135663,
      "learning_rate": 0.00019270915630680707,
      "loss": 0.7126,
      "step": 3275
    },
    {
      "epoch": 0.21039127645926875,
      "grad_norm": 0.8073736143636202,
      "learning_rate": 0.0001926671347553462,
      "loss": 0.7527,
      "step": 3280
    },
    {
      "epoch": 0.21071199486850545,
      "grad_norm": 0.8682418292741673,
      "learning_rate": 0.0001926249970621811,
      "loss": 0.5924,
      "step": 3285
    },
    {
      "epoch": 0.21103271327774215,
      "grad_norm": 0.553914766273313,
      "learning_rate": 0.00019258274328012384,
      "loss": 0.5456,
      "step": 3290
    },
    {
      "epoch": 0.21135343168697884,
      "grad_norm": 0.9718939215705609,
      "learning_rate": 0.00019254037346213204,
      "loss": 0.5976,
      "step": 3295
    },
    {
      "epoch": 0.2116741500962155,
      "grad_norm": 0.9064065621099515,
      "learning_rate": 0.00019249788766130863,
      "loss": 0.7424,
      "step": 3300
    },
    {
      "epoch": 0.2119948685054522,
      "grad_norm": 0.6693670165919959,
      "learning_rate": 0.00019245528593090204,
      "loss": 0.7834,
      "step": 3305
    },
    {
      "epoch": 0.2123155869146889,
      "grad_norm": 0.68000110275399,
      "learning_rate": 0.0001924125683243059,
      "loss": 0.8261,
      "step": 3310
    },
    {
      "epoch": 0.2126363053239256,
      "grad_norm": 0.8936655552945705,
      "learning_rate": 0.0001923697348950591,
      "loss": 0.7315,
      "step": 3315
    },
    {
      "epoch": 0.21295702373316228,
      "grad_norm": 0.9370537429273521,
      "learning_rate": 0.0001923267856968457,
      "loss": 0.6054,
      "step": 3320
    },
    {
      "epoch": 0.21327774214239897,
      "grad_norm": 1.5321045176308976,
      "learning_rate": 0.00019228372078349486,
      "loss": 0.6995,
      "step": 3325
    },
    {
      "epoch": 0.21359846055163567,
      "grad_norm": 0.8164083897600656,
      "learning_rate": 0.00019224054020898073,
      "loss": 0.7217,
      "step": 3330
    },
    {
      "epoch": 0.21391917896087234,
      "grad_norm": 0.9360751302506096,
      "learning_rate": 0.00019219724402742247,
      "loss": 0.7071,
      "step": 3335
    },
    {
      "epoch": 0.21423989737010904,
      "grad_norm": 1.1474158049320227,
      "learning_rate": 0.00019215383229308412,
      "loss": 0.696,
      "step": 3340
    },
    {
      "epoch": 0.21456061577934574,
      "grad_norm": 0.6286443687036616,
      "learning_rate": 0.0001921103050603745,
      "loss": 0.6582,
      "step": 3345
    },
    {
      "epoch": 0.21488133418858243,
      "grad_norm": 0.930008180786893,
      "learning_rate": 0.00019206666238384728,
      "loss": 0.7267,
      "step": 3350
    },
    {
      "epoch": 0.2152020525978191,
      "grad_norm": 0.8966235538817937,
      "learning_rate": 0.0001920229043182007,
      "loss": 0.7461,
      "step": 3355
    },
    {
      "epoch": 0.2155227710070558,
      "grad_norm": 0.6075118442836386,
      "learning_rate": 0.0001919790309182777,
      "loss": 0.6218,
      "step": 3360
    },
    {
      "epoch": 0.2158434894162925,
      "grad_norm": 1.120521483944113,
      "learning_rate": 0.00019193504223906577,
      "loss": 0.7854,
      "step": 3365
    },
    {
      "epoch": 0.2161642078255292,
      "grad_norm": 0.7536443555714086,
      "learning_rate": 0.00019189093833569686,
      "loss": 0.6665,
      "step": 3370
    },
    {
      "epoch": 0.21648492623476587,
      "grad_norm": 0.7306155955546904,
      "learning_rate": 0.00019184671926344732,
      "loss": 0.5562,
      "step": 3375
    },
    {
      "epoch": 0.21680564464400257,
      "grad_norm": 1.4066089443224215,
      "learning_rate": 0.00019180238507773788,
      "loss": 0.7206,
      "step": 3380
    },
    {
      "epoch": 0.21712636305323926,
      "grad_norm": 1.0420087314885336,
      "learning_rate": 0.0001917579358341335,
      "loss": 0.8488,
      "step": 3385
    },
    {
      "epoch": 0.21744708146247593,
      "grad_norm": 1.24092779077047,
      "learning_rate": 0.0001917133715883434,
      "loss": 0.7737,
      "step": 3390
    },
    {
      "epoch": 0.21776779987171263,
      "grad_norm": 1.2683256948043233,
      "learning_rate": 0.00019166869239622085,
      "loss": 0.5991,
      "step": 3395
    },
    {
      "epoch": 0.21808851828094933,
      "grad_norm": 1.0154708506536307,
      "learning_rate": 0.0001916238983137633,
      "loss": 0.6921,
      "step": 3400
    },
    {
      "epoch": 0.21840923669018603,
      "grad_norm": 1.250860867590444,
      "learning_rate": 0.00019157898939711212,
      "loss": 0.772,
      "step": 3405
    },
    {
      "epoch": 0.2187299550994227,
      "grad_norm": 1.0205976247637063,
      "learning_rate": 0.0001915339657025526,
      "loss": 0.6262,
      "step": 3410
    },
    {
      "epoch": 0.2190506735086594,
      "grad_norm": 0.6808470166264919,
      "learning_rate": 0.0001914888272865139,
      "loss": 0.5628,
      "step": 3415
    },
    {
      "epoch": 0.2193713919178961,
      "grad_norm": 1.0460679318245396,
      "learning_rate": 0.00019144357420556893,
      "loss": 0.6497,
      "step": 3420
    },
    {
      "epoch": 0.2196921103271328,
      "grad_norm": 0.8912439646989759,
      "learning_rate": 0.00019139820651643442,
      "loss": 0.5868,
      "step": 3425
    },
    {
      "epoch": 0.22001282873636946,
      "grad_norm": 0.6690277429678054,
      "learning_rate": 0.00019135272427597063,
      "loss": 0.6833,
      "step": 3430
    },
    {
      "epoch": 0.22033354714560616,
      "grad_norm": 1.0200781753500376,
      "learning_rate": 0.00019130712754118138,
      "loss": 0.6225,
      "step": 3435
    },
    {
      "epoch": 0.22065426555484285,
      "grad_norm": 1.0186432727769665,
      "learning_rate": 0.00019126141636921414,
      "loss": 0.769,
      "step": 3440
    },
    {
      "epoch": 0.22097498396407952,
      "grad_norm": 0.671761473616358,
      "learning_rate": 0.0001912155908173596,
      "loss": 0.6917,
      "step": 3445
    },
    {
      "epoch": 0.22129570237331622,
      "grad_norm": 0.7493482108843831,
      "learning_rate": 0.00019116965094305197,
      "loss": 0.7762,
      "step": 3450
    },
    {
      "epoch": 0.22161642078255292,
      "grad_norm": 0.9676529237022933,
      "learning_rate": 0.00019112359680386863,
      "loss": 0.6426,
      "step": 3455
    },
    {
      "epoch": 0.22193713919178962,
      "grad_norm": 0.7117654744699794,
      "learning_rate": 0.00019107742845753025,
      "loss": 0.6968,
      "step": 3460
    },
    {
      "epoch": 0.2222578576010263,
      "grad_norm": 1.0489562483489054,
      "learning_rate": 0.0001910311459619006,
      "loss": 0.7852,
      "step": 3465
    },
    {
      "epoch": 0.22257857601026299,
      "grad_norm": 0.7103830582474117,
      "learning_rate": 0.00019098474937498652,
      "loss": 0.6496,
      "step": 3470
    },
    {
      "epoch": 0.22289929441949968,
      "grad_norm": 1.1088261693908699,
      "learning_rate": 0.00019093823875493784,
      "loss": 0.7313,
      "step": 3475
    },
    {
      "epoch": 0.22322001282873638,
      "grad_norm": 1.1659589438084368,
      "learning_rate": 0.00019089161416004733,
      "loss": 0.6526,
      "step": 3480
    },
    {
      "epoch": 0.22354073123797305,
      "grad_norm": 0.7493230462026259,
      "learning_rate": 0.0001908448756487506,
      "loss": 0.6629,
      "step": 3485
    },
    {
      "epoch": 0.22386144964720975,
      "grad_norm": 0.8650060759204274,
      "learning_rate": 0.000190798023279626,
      "loss": 0.7321,
      "step": 3490
    },
    {
      "epoch": 0.22418216805644645,
      "grad_norm": 0.8002336983221607,
      "learning_rate": 0.0001907510571113946,
      "loss": 0.7816,
      "step": 3495
    },
    {
      "epoch": 0.22450288646568314,
      "grad_norm": 0.6840069838552998,
      "learning_rate": 0.00019070397720292014,
      "loss": 0.6472,
      "step": 3500
    },
    {
      "epoch": 0.2248236048749198,
      "grad_norm": 0.9253534124109082,
      "learning_rate": 0.0001906567836132089,
      "loss": 0.7952,
      "step": 3505
    },
    {
      "epoch": 0.2251443232841565,
      "grad_norm": 0.8707427934510977,
      "learning_rate": 0.0001906094764014095,
      "loss": 0.6403,
      "step": 3510
    },
    {
      "epoch": 0.2254650416933932,
      "grad_norm": 0.8952137846177877,
      "learning_rate": 0.00019056205562681324,
      "loss": 0.7713,
      "step": 3515
    },
    {
      "epoch": 0.22578576010262988,
      "grad_norm": 1.2157321282590767,
      "learning_rate": 0.00019051452134885346,
      "loss": 0.7791,
      "step": 3520
    },
    {
      "epoch": 0.22610647851186658,
      "grad_norm": 1.1942747630269164,
      "learning_rate": 0.000190466873627106,
      "loss": 0.7107,
      "step": 3525
    },
    {
      "epoch": 0.22642719692110327,
      "grad_norm": 0.7534228887260359,
      "learning_rate": 0.00019041911252128864,
      "loss": 0.7748,
      "step": 3530
    },
    {
      "epoch": 0.22674791533033997,
      "grad_norm": 0.7020738108193582,
      "learning_rate": 0.0001903712380912615,
      "loss": 0.641,
      "step": 3535
    },
    {
      "epoch": 0.22706863373957664,
      "grad_norm": 0.8822584692031392,
      "learning_rate": 0.0001903232503970266,
      "loss": 0.7302,
      "step": 3540
    },
    {
      "epoch": 0.22738935214881334,
      "grad_norm": 0.7669563154301963,
      "learning_rate": 0.00019027514949872794,
      "loss": 0.6305,
      "step": 3545
    },
    {
      "epoch": 0.22771007055805004,
      "grad_norm": 0.75341665833547,
      "learning_rate": 0.0001902269354566514,
      "loss": 0.5966,
      "step": 3550
    },
    {
      "epoch": 0.22803078896728673,
      "grad_norm": 1.3621102982113154,
      "learning_rate": 0.00019017860833122466,
      "loss": 0.7256,
      "step": 3555
    },
    {
      "epoch": 0.2283515073765234,
      "grad_norm": 0.6413371506739955,
      "learning_rate": 0.00019013016818301718,
      "loss": 0.7576,
      "step": 3560
    },
    {
      "epoch": 0.2286722257857601,
      "grad_norm": 0.9240762303756279,
      "learning_rate": 0.00019008161507274004,
      "loss": 0.6412,
      "step": 3565
    },
    {
      "epoch": 0.2289929441949968,
      "grad_norm": 0.600216888507175,
      "learning_rate": 0.0001900329490612458,
      "loss": 0.6077,
      "step": 3570
    },
    {
      "epoch": 0.22931366260423347,
      "grad_norm": 0.7764633127488129,
      "learning_rate": 0.0001899841702095287,
      "loss": 0.7296,
      "step": 3575
    },
    {
      "epoch": 0.22963438101347017,
      "grad_norm": 0.8982484209272996,
      "learning_rate": 0.00018993527857872437,
      "loss": 0.7016,
      "step": 3580
    },
    {
      "epoch": 0.22995509942270687,
      "grad_norm": 1.0720659350142319,
      "learning_rate": 0.0001898862742301096,
      "loss": 0.7538,
      "step": 3585
    },
    {
      "epoch": 0.23027581783194356,
      "grad_norm": 1.1146855770453603,
      "learning_rate": 0.00018983715722510267,
      "loss": 0.7336,
      "step": 3590
    },
    {
      "epoch": 0.23059653624118023,
      "grad_norm": 1.0183157286000422,
      "learning_rate": 0.00018978792762526297,
      "loss": 0.7608,
      "step": 3595
    },
    {
      "epoch": 0.23091725465041693,
      "grad_norm": 0.5987067875621542,
      "learning_rate": 0.000189738585492291,
      "loss": 0.7482,
      "step": 3600
    },
    {
      "epoch": 0.23123797305965363,
      "grad_norm": 1.2051854914953493,
      "learning_rate": 0.0001896891308880283,
      "loss": 0.6866,
      "step": 3605
    },
    {
      "epoch": 0.23155869146889033,
      "grad_norm": 0.6469997389423526,
      "learning_rate": 0.00018963956387445743,
      "loss": 0.5533,
      "step": 3610
    },
    {
      "epoch": 0.231879409878127,
      "grad_norm": 0.751435050187464,
      "learning_rate": 0.00018958988451370172,
      "loss": 0.5345,
      "step": 3615
    },
    {
      "epoch": 0.2322001282873637,
      "grad_norm": 0.9296699512717883,
      "learning_rate": 0.00018954009286802545,
      "loss": 0.6395,
      "step": 3620
    },
    {
      "epoch": 0.2325208466966004,
      "grad_norm": 0.8523320100136826,
      "learning_rate": 0.0001894901889998335,
      "loss": 0.6699,
      "step": 3625
    },
    {
      "epoch": 0.23284156510583706,
      "grad_norm": 0.8927205659717501,
      "learning_rate": 0.0001894401729716715,
      "loss": 0.7016,
      "step": 3630
    },
    {
      "epoch": 0.23316228351507376,
      "grad_norm": 0.9773519130062428,
      "learning_rate": 0.00018939004484622556,
      "loss": 0.5938,
      "step": 3635
    },
    {
      "epoch": 0.23348300192431046,
      "grad_norm": 1.205672119851859,
      "learning_rate": 0.00018933980468632236,
      "loss": 0.6659,
      "step": 3640
    },
    {
      "epoch": 0.23380372033354715,
      "grad_norm": 0.7579640404532227,
      "learning_rate": 0.00018928945255492898,
      "loss": 0.6189,
      "step": 3645
    },
    {
      "epoch": 0.23412443874278382,
      "grad_norm": 0.7167559954703847,
      "learning_rate": 0.0001892389885151528,
      "loss": 0.7174,
      "step": 3650
    },
    {
      "epoch": 0.23444515715202052,
      "grad_norm": 0.9211676236510546,
      "learning_rate": 0.0001891884126302415,
      "loss": 0.7194,
      "step": 3655
    },
    {
      "epoch": 0.23476587556125722,
      "grad_norm": 1.0264289808335763,
      "learning_rate": 0.00018913772496358293,
      "loss": 0.7518,
      "step": 3660
    },
    {
      "epoch": 0.23508659397049392,
      "grad_norm": 0.7037785727516465,
      "learning_rate": 0.000189086925578705,
      "loss": 0.6463,
      "step": 3665
    },
    {
      "epoch": 0.2354073123797306,
      "grad_norm": 0.7939519982595736,
      "learning_rate": 0.0001890360145392757,
      "loss": 0.6679,
      "step": 3670
    },
    {
      "epoch": 0.23572803078896729,
      "grad_norm": 0.9346634485226615,
      "learning_rate": 0.00018898499190910285,
      "loss": 0.6707,
      "step": 3675
    },
    {
      "epoch": 0.23604874919820398,
      "grad_norm": 0.9205144038862676,
      "learning_rate": 0.00018893385775213428,
      "loss": 0.5932,
      "step": 3680
    },
    {
      "epoch": 0.23636946760744068,
      "grad_norm": 0.7662986014450179,
      "learning_rate": 0.00018888261213245751,
      "loss": 0.626,
      "step": 3685
    },
    {
      "epoch": 0.23669018601667735,
      "grad_norm": 0.9540864146877855,
      "learning_rate": 0.00018883125511429976,
      "loss": 0.6775,
      "step": 3690
    },
    {
      "epoch": 0.23701090442591405,
      "grad_norm": 0.8236472390358622,
      "learning_rate": 0.0001887797867620279,
      "loss": 0.5783,
      "step": 3695
    },
    {
      "epoch": 0.23733162283515075,
      "grad_norm": 1.1046319576589374,
      "learning_rate": 0.00018872820714014828,
      "loss": 0.7178,
      "step": 3700
    },
    {
      "epoch": 0.23765234124438742,
      "grad_norm": 0.8687058181792315,
      "learning_rate": 0.0001886765163133068,
      "loss": 0.7188,
      "step": 3705
    },
    {
      "epoch": 0.2379730596536241,
      "grad_norm": 0.8074055463421766,
      "learning_rate": 0.0001886247143462886,
      "loss": 0.6839,
      "step": 3710
    },
    {
      "epoch": 0.2382937780628608,
      "grad_norm": 0.9477091526553252,
      "learning_rate": 0.0001885728013040183,
      "loss": 0.694,
      "step": 3715
    },
    {
      "epoch": 0.2386144964720975,
      "grad_norm": 1.4070444194213776,
      "learning_rate": 0.00018852077725155955,
      "loss": 0.6443,
      "step": 3720
    },
    {
      "epoch": 0.23893521488133418,
      "grad_norm": 0.7885481772614231,
      "learning_rate": 0.00018846864225411522,
      "loss": 0.6975,
      "step": 3725
    },
    {
      "epoch": 0.23925593329057088,
      "grad_norm": 1.416662073982706,
      "learning_rate": 0.0001884163963770272,
      "loss": 0.5101,
      "step": 3730
    },
    {
      "epoch": 0.23957665169980757,
      "grad_norm": 1.1458969994696415,
      "learning_rate": 0.00018836403968577642,
      "loss": 0.6615,
      "step": 3735
    },
    {
      "epoch": 0.23989737010904427,
      "grad_norm": 0.8353107592687541,
      "learning_rate": 0.00018831157224598265,
      "loss": 0.6361,
      "step": 3740
    },
    {
      "epoch": 0.24021808851828094,
      "grad_norm": 0.9588837283118316,
      "learning_rate": 0.0001882589941234044,
      "loss": 0.6013,
      "step": 3745
    },
    {
      "epoch": 0.24053880692751764,
      "grad_norm": 0.9378372320194371,
      "learning_rate": 0.00018820630538393896,
      "loss": 0.6638,
      "step": 3750
    },
    {
      "epoch": 0.24085952533675434,
      "grad_norm": 0.657630819098,
      "learning_rate": 0.0001881535060936223,
      "loss": 0.6291,
      "step": 3755
    },
    {
      "epoch": 0.241180243745991,
      "grad_norm": 0.8483718480641205,
      "learning_rate": 0.00018810059631862885,
      "loss": 0.7489,
      "step": 3760
    },
    {
      "epoch": 0.2415009621552277,
      "grad_norm": 0.6502718844446955,
      "learning_rate": 0.0001880475761252716,
      "loss": 0.7414,
      "step": 3765
    },
    {
      "epoch": 0.2418216805644644,
      "grad_norm": 1.1168778404379636,
      "learning_rate": 0.00018799444558000188,
      "loss": 0.5148,
      "step": 3770
    },
    {
      "epoch": 0.2421423989737011,
      "grad_norm": 0.7913864245267141,
      "learning_rate": 0.00018794120474940936,
      "loss": 0.7854,
      "step": 3775
    },
    {
      "epoch": 0.24246311738293777,
      "grad_norm": 0.6448828952136001,
      "learning_rate": 0.00018788785370022187,
      "loss": 0.7078,
      "step": 3780
    },
    {
      "epoch": 0.24278383579217447,
      "grad_norm": 1.5060141096885609,
      "learning_rate": 0.00018783439249930544,
      "loss": 0.6149,
      "step": 3785
    },
    {
      "epoch": 0.24310455420141117,
      "grad_norm": 1.1449759900992198,
      "learning_rate": 0.00018778082121366415,
      "loss": 0.6848,
      "step": 3790
    },
    {
      "epoch": 0.24342527261064786,
      "grad_norm": 0.8978384550293506,
      "learning_rate": 0.00018772713991044006,
      "loss": 0.5786,
      "step": 3795
    },
    {
      "epoch": 0.24374599101988453,
      "grad_norm": 1.0307173194583823,
      "learning_rate": 0.0001876733486569131,
      "loss": 0.6089,
      "step": 3800
    },
    {
      "epoch": 0.24406670942912123,
      "grad_norm": 1.0460496173819018,
      "learning_rate": 0.00018761944752050092,
      "loss": 0.7205,
      "step": 3805
    },
    {
      "epoch": 0.24438742783835793,
      "grad_norm": 0.7905784500183457,
      "learning_rate": 0.00018756543656875903,
      "loss": 0.6866,
      "step": 3810
    },
    {
      "epoch": 0.2447081462475946,
      "grad_norm": 0.8146037687112702,
      "learning_rate": 0.0001875113158693805,
      "loss": 0.6722,
      "step": 3815
    },
    {
      "epoch": 0.2450288646568313,
      "grad_norm": 0.6700527883378358,
      "learning_rate": 0.00018745708549019598,
      "loss": 0.69,
      "step": 3820
    },
    {
      "epoch": 0.245349583066068,
      "grad_norm": 0.86059539710882,
      "learning_rate": 0.00018740274549917355,
      "loss": 0.6951,
      "step": 3825
    },
    {
      "epoch": 0.2456703014753047,
      "grad_norm": 0.754486021920581,
      "learning_rate": 0.00018734829596441869,
      "loss": 0.669,
      "step": 3830
    },
    {
      "epoch": 0.24599101988454136,
      "grad_norm": 1.2671234138000913,
      "learning_rate": 0.00018729373695417411,
      "loss": 0.53,
      "step": 3835
    },
    {
      "epoch": 0.24631173829377806,
      "grad_norm": 0.6932982987761634,
      "learning_rate": 0.0001872390685368199,
      "loss": 0.6588,
      "step": 3840
    },
    {
      "epoch": 0.24663245670301476,
      "grad_norm": 0.8973942648351731,
      "learning_rate": 0.00018718429078087306,
      "loss": 0.759,
      "step": 3845
    },
    {
      "epoch": 0.24695317511225146,
      "grad_norm": 0.8232879633687452,
      "learning_rate": 0.00018712940375498777,
      "loss": 0.7228,
      "step": 3850
    },
    {
      "epoch": 0.24727389352148813,
      "grad_norm": 0.6326649992249508,
      "learning_rate": 0.0001870744075279551,
      "loss": 0.7392,
      "step": 3855
    },
    {
      "epoch": 0.24759461193072482,
      "grad_norm": 1.097141467166474,
      "learning_rate": 0.000187019302168703,
      "loss": 0.6787,
      "step": 3860
    },
    {
      "epoch": 0.24791533033996152,
      "grad_norm": 0.3009107744843191,
      "learning_rate": 0.00018696408774629623,
      "loss": 0.5101,
      "step": 3865
    },
    {
      "epoch": 0.2482360487491982,
      "grad_norm": 0.8763665765416497,
      "learning_rate": 0.00018690876432993616,
      "loss": 0.6693,
      "step": 3870
    },
    {
      "epoch": 0.2485567671584349,
      "grad_norm": 0.8358957515633696,
      "learning_rate": 0.00018685333198896085,
      "loss": 0.4624,
      "step": 3875
    },
    {
      "epoch": 0.24887748556767159,
      "grad_norm": 0.7954157351888587,
      "learning_rate": 0.00018679779079284478,
      "loss": 0.6448,
      "step": 3880
    },
    {
      "epoch": 0.24919820397690828,
      "grad_norm": 0.8015671945298257,
      "learning_rate": 0.00018674214081119899,
      "loss": 0.7378,
      "step": 3885
    },
    {
      "epoch": 0.24951892238614495,
      "grad_norm": 0.4176253877935304,
      "learning_rate": 0.00018668638211377075,
      "loss": 0.6243,
      "step": 3890
    },
    {
      "epoch": 0.24983964079538165,
      "grad_norm": 0.9442754652275936,
      "learning_rate": 0.00018663051477044363,
      "loss": 0.7179,
      "step": 3895
    },
    {
      "epoch": 0.2501603592046183,
      "grad_norm": 0.4823245844586911,
      "learning_rate": 0.00018657453885123743,
      "loss": 0.6911,
      "step": 3900
    },
    {
      "epoch": 0.250481077613855,
      "grad_norm": 1.2379921804802545,
      "learning_rate": 0.00018651845442630788,
      "loss": 0.7287,
      "step": 3905
    },
    {
      "epoch": 0.2508017960230917,
      "grad_norm": 0.8025900155844875,
      "learning_rate": 0.00018646226156594683,
      "loss": 0.6996,
      "step": 3910
    },
    {
      "epoch": 0.2511225144323284,
      "grad_norm": 0.7107570481507937,
      "learning_rate": 0.00018640596034058202,
      "loss": 0.6547,
      "step": 3915
    },
    {
      "epoch": 0.2514432328415651,
      "grad_norm": 1.0641358272949475,
      "learning_rate": 0.00018634955082077694,
      "loss": 0.6644,
      "step": 3920
    },
    {
      "epoch": 0.2517639512508018,
      "grad_norm": 0.47480734009901776,
      "learning_rate": 0.00018629303307723087,
      "loss": 0.573,
      "step": 3925
    },
    {
      "epoch": 0.2520846696600385,
      "grad_norm": 0.793188561410365,
      "learning_rate": 0.0001862364071807787,
      "loss": 0.5214,
      "step": 3930
    },
    {
      "epoch": 0.25240538806927515,
      "grad_norm": 1.0592935580458442,
      "learning_rate": 0.00018617967320239088,
      "loss": 0.7271,
      "step": 3935
    },
    {
      "epoch": 0.25272610647851185,
      "grad_norm": 1.2256726599433683,
      "learning_rate": 0.00018612283121317334,
      "loss": 0.6422,
      "step": 3940
    },
    {
      "epoch": 0.25304682488774854,
      "grad_norm": 0.7519903384129473,
      "learning_rate": 0.00018606588128436733,
      "loss": 0.5867,
      "step": 3945
    },
    {
      "epoch": 0.25336754329698524,
      "grad_norm": 0.7245403184900441,
      "learning_rate": 0.00018600882348734942,
      "loss": 0.595,
      "step": 3950
    },
    {
      "epoch": 0.25368826170622194,
      "grad_norm": 0.8118238034713691,
      "learning_rate": 0.0001859516578936314,
      "loss": 0.6789,
      "step": 3955
    },
    {
      "epoch": 0.25400898011545864,
      "grad_norm": 0.94671989401086,
      "learning_rate": 0.0001858943845748601,
      "loss": 0.5563,
      "step": 3960
    },
    {
      "epoch": 0.25432969852469534,
      "grad_norm": 1.2366250568429358,
      "learning_rate": 0.00018583700360281743,
      "loss": 0.7508,
      "step": 3965
    },
    {
      "epoch": 0.25465041693393203,
      "grad_norm": 0.79253106009907,
      "learning_rate": 0.00018577951504942014,
      "loss": 0.8067,
      "step": 3970
    },
    {
      "epoch": 0.2549711353431687,
      "grad_norm": 0.8702530726486416,
      "learning_rate": 0.0001857219189867199,
      "loss": 0.617,
      "step": 3975
    },
    {
      "epoch": 0.2552918537524054,
      "grad_norm": 1.0941049074741396,
      "learning_rate": 0.0001856642154869031,
      "loss": 0.6722,
      "step": 3980
    },
    {
      "epoch": 0.25561257216164207,
      "grad_norm": 0.8439431895631772,
      "learning_rate": 0.00018560640462229072,
      "loss": 0.4939,
      "step": 3985
    },
    {
      "epoch": 0.25593329057087877,
      "grad_norm": 0.6351905484581176,
      "learning_rate": 0.00018554848646533842,
      "loss": 0.6447,
      "step": 3990
    },
    {
      "epoch": 0.25625400898011547,
      "grad_norm": 0.5405523691592523,
      "learning_rate": 0.00018549046108863623,
      "loss": 0.619,
      "step": 3995
    },
    {
      "epoch": 0.25657472738935216,
      "grad_norm": 0.9663208760661458,
      "learning_rate": 0.00018543232856490857,
      "loss": 0.7077,
      "step": 4000
    },
    {
      "epoch": 0.25689544579858886,
      "grad_norm": 1.1847646315539586,
      "learning_rate": 0.00018537408896701426,
      "loss": 0.645,
      "step": 4005
    },
    {
      "epoch": 0.2572161642078255,
      "grad_norm": 0.9615403982388305,
      "learning_rate": 0.00018531574236794614,
      "loss": 0.6811,
      "step": 4010
    },
    {
      "epoch": 0.2575368826170622,
      "grad_norm": 0.8358212875135942,
      "learning_rate": 0.0001852572888408313,
      "loss": 0.7614,
      "step": 4015
    },
    {
      "epoch": 0.2578576010262989,
      "grad_norm": 0.654849517944886,
      "learning_rate": 0.00018519872845893084,
      "loss": 0.7217,
      "step": 4020
    },
    {
      "epoch": 0.2581783194355356,
      "grad_norm": 1.2575079996892056,
      "learning_rate": 0.00018514006129563966,
      "loss": 0.6607,
      "step": 4025
    },
    {
      "epoch": 0.2584990378447723,
      "grad_norm": 0.9922068320402926,
      "learning_rate": 0.00018508128742448664,
      "loss": 0.837,
      "step": 4030
    },
    {
      "epoch": 0.258819756254009,
      "grad_norm": 0.6769732353504583,
      "learning_rate": 0.00018502240691913423,
      "loss": 0.5391,
      "step": 4035
    },
    {
      "epoch": 0.2591404746632457,
      "grad_norm": 1.0085400425349142,
      "learning_rate": 0.00018496341985337872,
      "loss": 0.6348,
      "step": 4040
    },
    {
      "epoch": 0.2594611930724824,
      "grad_norm": 1.0848700957447277,
      "learning_rate": 0.00018490432630114987,
      "loss": 0.6778,
      "step": 4045
    },
    {
      "epoch": 0.25978191148171903,
      "grad_norm": 2.0271957707532953,
      "learning_rate": 0.00018484512633651083,
      "loss": 0.654,
      "step": 4050
    },
    {
      "epoch": 0.2601026298909557,
      "grad_norm": 0.7805695373329654,
      "learning_rate": 0.00018478582003365822,
      "loss": 0.7096,
      "step": 4055
    },
    {
      "epoch": 0.2604233483001924,
      "grad_norm": 0.9870035129297559,
      "learning_rate": 0.0001847264074669219,
      "loss": 0.6384,
      "step": 4060
    },
    {
      "epoch": 0.2607440667094291,
      "grad_norm": 1.4231275295206969,
      "learning_rate": 0.00018466688871076492,
      "loss": 0.7516,
      "step": 4065
    },
    {
      "epoch": 0.2610647851186658,
      "grad_norm": 0.9526984436593213,
      "learning_rate": 0.00018460726383978337,
      "loss": 0.7593,
      "step": 4070
    },
    {
      "epoch": 0.2613855035279025,
      "grad_norm": 0.8092373561884175,
      "learning_rate": 0.00018454753292870645,
      "loss": 0.7056,
      "step": 4075
    },
    {
      "epoch": 0.2617062219371392,
      "grad_norm": 1.0372403017182314,
      "learning_rate": 0.0001844876960523961,
      "loss": 0.7301,
      "step": 4080
    },
    {
      "epoch": 0.26202694034637586,
      "grad_norm": 1.0864230414581424,
      "learning_rate": 0.0001844277532858472,
      "loss": 0.7108,
      "step": 4085
    },
    {
      "epoch": 0.26234765875561256,
      "grad_norm": 1.1180610427980169,
      "learning_rate": 0.00018436770470418734,
      "loss": 0.6945,
      "step": 4090
    },
    {
      "epoch": 0.26266837716484925,
      "grad_norm": 0.7213205274182185,
      "learning_rate": 0.00018430755038267664,
      "loss": 0.5532,
      "step": 4095
    },
    {
      "epoch": 0.26298909557408595,
      "grad_norm": 1.1163686122257008,
      "learning_rate": 0.00018424729039670786,
      "loss": 0.6516,
      "step": 4100
    },
    {
      "epoch": 0.26330981398332265,
      "grad_norm": 1.2583036183921432,
      "learning_rate": 0.00018418692482180605,
      "loss": 0.6414,
      "step": 4105
    },
    {
      "epoch": 0.26363053239255935,
      "grad_norm": 0.9930140372439703,
      "learning_rate": 0.0001841264537336287,
      "loss": 0.6207,
      "step": 4110
    },
    {
      "epoch": 0.26395125080179604,
      "grad_norm": 1.0089622154428168,
      "learning_rate": 0.00018406587720796555,
      "loss": 0.584,
      "step": 4115
    },
    {
      "epoch": 0.2642719692110327,
      "grad_norm": 0.7458841041229098,
      "learning_rate": 0.00018400519532073845,
      "loss": 0.5883,
      "step": 4120
    },
    {
      "epoch": 0.2645926876202694,
      "grad_norm": 0.8089823917563255,
      "learning_rate": 0.0001839444081480013,
      "loss": 0.7034,
      "step": 4125
    },
    {
      "epoch": 0.2649134060295061,
      "grad_norm": 0.6692062310802624,
      "learning_rate": 0.00018388351576594,
      "loss": 0.6344,
      "step": 4130
    },
    {
      "epoch": 0.2652341244387428,
      "grad_norm": 1.1933403776576017,
      "learning_rate": 0.0001838225182508722,
      "loss": 0.6661,
      "step": 4135
    },
    {
      "epoch": 0.2655548428479795,
      "grad_norm": 0.8440572180162611,
      "learning_rate": 0.00018376141567924746,
      "loss": 0.748,
      "step": 4140
    },
    {
      "epoch": 0.2658755612572162,
      "grad_norm": 0.8186841087339073,
      "learning_rate": 0.0001837002081276469,
      "loss": 0.7713,
      "step": 4145
    },
    {
      "epoch": 0.2661962796664529,
      "grad_norm": 1.0666433490645642,
      "learning_rate": 0.0001836388956727833,
      "loss": 0.8609,
      "step": 4150
    },
    {
      "epoch": 0.26651699807568957,
      "grad_norm": 1.1355241254608384,
      "learning_rate": 0.00018357747839150082,
      "loss": 0.6469,
      "step": 4155
    },
    {
      "epoch": 0.2668377164849262,
      "grad_norm": 0.7464964673319473,
      "learning_rate": 0.00018351595636077509,
      "loss": 0.5979,
      "step": 4160
    },
    {
      "epoch": 0.2671584348941629,
      "grad_norm": 0.8983502422541593,
      "learning_rate": 0.00018345432965771296,
      "loss": 0.6956,
      "step": 4165
    },
    {
      "epoch": 0.2674791533033996,
      "grad_norm": 1.0667530685360391,
      "learning_rate": 0.00018339259835955252,
      "loss": 0.613,
      "step": 4170
    },
    {
      "epoch": 0.2677998717126363,
      "grad_norm": 0.9132017699113576,
      "learning_rate": 0.00018333076254366292,
      "loss": 0.7377,
      "step": 4175
    },
    {
      "epoch": 0.268120590121873,
      "grad_norm": 0.820877622590415,
      "learning_rate": 0.0001832688222875443,
      "loss": 0.6287,
      "step": 4180
    },
    {
      "epoch": 0.2684413085311097,
      "grad_norm": 1.118619920969021,
      "learning_rate": 0.00018320677766882777,
      "loss": 0.6384,
      "step": 4185
    },
    {
      "epoch": 0.2687620269403464,
      "grad_norm": 1.4366554572404993,
      "learning_rate": 0.00018314462876527508,
      "loss": 0.6833,
      "step": 4190
    },
    {
      "epoch": 0.26908274534958304,
      "grad_norm": 1.0835964639148083,
      "learning_rate": 0.00018308237565477887,
      "loss": 0.5727,
      "step": 4195
    },
    {
      "epoch": 0.26940346375881974,
      "grad_norm": 0.9256686315486947,
      "learning_rate": 0.00018302001841536222,
      "loss": 0.6766,
      "step": 4200
    },
    {
      "epoch": 0.26972418216805644,
      "grad_norm": 0.9133924374197757,
      "learning_rate": 0.00018295755712517887,
      "loss": 0.6114,
      "step": 4205
    },
    {
      "epoch": 0.27004490057729313,
      "grad_norm": 0.9886601065235708,
      "learning_rate": 0.00018289499186251282,
      "loss": 0.6487,
      "step": 4210
    },
    {
      "epoch": 0.27036561898652983,
      "grad_norm": 0.7921503565458989,
      "learning_rate": 0.00018283232270577854,
      "loss": 0.5979,
      "step": 4215
    },
    {
      "epoch": 0.27068633739576653,
      "grad_norm": 0.6150099468882971,
      "learning_rate": 0.00018276954973352053,
      "loss": 0.6981,
      "step": 4220
    },
    {
      "epoch": 0.2710070558050032,
      "grad_norm": 1.0834800425960802,
      "learning_rate": 0.00018270667302441355,
      "loss": 0.5754,
      "step": 4225
    },
    {
      "epoch": 0.27132777421423987,
      "grad_norm": 1.6569395813805736,
      "learning_rate": 0.00018264369265726232,
      "loss": 0.6754,
      "step": 4230
    },
    {
      "epoch": 0.27164849262347657,
      "grad_norm": 1.1904706994873762,
      "learning_rate": 0.0001825806087110015,
      "loss": 0.6955,
      "step": 4235
    },
    {
      "epoch": 0.27196921103271327,
      "grad_norm": 0.9036845887010689,
      "learning_rate": 0.00018251742126469553,
      "loss": 0.6245,
      "step": 4240
    },
    {
      "epoch": 0.27228992944194996,
      "grad_norm": 1.2154289806047023,
      "learning_rate": 0.00018245413039753858,
      "loss": 0.6966,
      "step": 4245
    },
    {
      "epoch": 0.27261064785118666,
      "grad_norm": 0.7781670764658554,
      "learning_rate": 0.00018239073618885447,
      "loss": 0.5014,
      "step": 4250
    },
    {
      "epoch": 0.27293136626042336,
      "grad_norm": 0.9312674308580604,
      "learning_rate": 0.00018232723871809654,
      "loss": 0.7177,
      "step": 4255
    },
    {
      "epoch": 0.27325208466966006,
      "grad_norm": 0.7997579086131462,
      "learning_rate": 0.00018226363806484749,
      "loss": 0.6622,
      "step": 4260
    },
    {
      "epoch": 0.27357280307889675,
      "grad_norm": 1.1414064891921076,
      "learning_rate": 0.00018219993430881935,
      "loss": 0.7326,
      "step": 4265
    },
    {
      "epoch": 0.2738935214881334,
      "grad_norm": 0.8488220516302005,
      "learning_rate": 0.00018213612752985346,
      "loss": 0.6111,
      "step": 4270
    },
    {
      "epoch": 0.2742142398973701,
      "grad_norm": 0.6785943182404776,
      "learning_rate": 0.00018207221780792022,
      "loss": 0.568,
      "step": 4275
    },
    {
      "epoch": 0.2745349583066068,
      "grad_norm": 0.7407135493281501,
      "learning_rate": 0.00018200820522311907,
      "loss": 0.9428,
      "step": 4280
    },
    {
      "epoch": 0.2748556767158435,
      "grad_norm": 0.7785838981084623,
      "learning_rate": 0.00018194408985567826,
      "loss": 0.6602,
      "step": 4285
    },
    {
      "epoch": 0.2751763951250802,
      "grad_norm": 1.3274741440702664,
      "learning_rate": 0.00018187987178595506,
      "loss": 0.6326,
      "step": 4290
    },
    {
      "epoch": 0.2754971135343169,
      "grad_norm": 0.7698326162883183,
      "learning_rate": 0.00018181555109443527,
      "loss": 0.7828,
      "step": 4295
    },
    {
      "epoch": 0.2758178319435536,
      "grad_norm": 0.9874438661020553,
      "learning_rate": 0.00018175112786173345,
      "loss": 0.6177,
      "step": 4300
    },
    {
      "epoch": 0.2761385503527902,
      "grad_norm": 1.2983806783457539,
      "learning_rate": 0.0001816866021685926,
      "loss": 0.5931,
      "step": 4305
    },
    {
      "epoch": 0.2764592687620269,
      "grad_norm": 0.6650133276949847,
      "learning_rate": 0.00018162197409588414,
      "loss": 0.6065,
      "step": 4310
    },
    {
      "epoch": 0.2767799871712636,
      "grad_norm": 0.6615532414642794,
      "learning_rate": 0.0001815572437246078,
      "loss": 0.6777,
      "step": 4315
    },
    {
      "epoch": 0.2771007055805003,
      "grad_norm": 0.9856674878658384,
      "learning_rate": 0.00018149241113589158,
      "loss": 0.7992,
      "step": 4320
    },
    {
      "epoch": 0.277421423989737,
      "grad_norm": 0.9736624117716728,
      "learning_rate": 0.00018142747641099156,
      "loss": 0.6433,
      "step": 4325
    },
    {
      "epoch": 0.2777421423989737,
      "grad_norm": 0.6411826659070557,
      "learning_rate": 0.00018136243963129176,
      "loss": 0.6934,
      "step": 4330
    },
    {
      "epoch": 0.2780628608082104,
      "grad_norm": 1.1535749419623087,
      "learning_rate": 0.00018129730087830423,
      "loss": 0.6763,
      "step": 4335
    },
    {
      "epoch": 0.2783835792174471,
      "grad_norm": 0.9545043501616219,
      "learning_rate": 0.00018123206023366875,
      "loss": 0.6913,
      "step": 4340
    },
    {
      "epoch": 0.27870429762668375,
      "grad_norm": 0.8726709507710128,
      "learning_rate": 0.00018116671777915279,
      "loss": 0.6719,
      "step": 4345
    },
    {
      "epoch": 0.27902501603592045,
      "grad_norm": 0.8365717106126314,
      "learning_rate": 0.00018110127359665144,
      "loss": 0.8124,
      "step": 4350
    },
    {
      "epoch": 0.27934573444515715,
      "grad_norm": 1.2549482014888076,
      "learning_rate": 0.00018103572776818734,
      "loss": 0.6818,
      "step": 4355
    },
    {
      "epoch": 0.27966645285439384,
      "grad_norm": 1.0842835676700455,
      "learning_rate": 0.00018097008037591046,
      "loss": 0.6671,
      "step": 4360
    },
    {
      "epoch": 0.27998717126363054,
      "grad_norm": 0.9380406537541407,
      "learning_rate": 0.00018090433150209809,
      "loss": 0.6949,
      "step": 4365
    },
    {
      "epoch": 0.28030788967286724,
      "grad_norm": 1.150794578223368,
      "learning_rate": 0.00018083848122915468,
      "loss": 0.7515,
      "step": 4370
    },
    {
      "epoch": 0.28062860808210394,
      "grad_norm": 0.8083227750174746,
      "learning_rate": 0.0001807725296396118,
      "loss": 0.7616,
      "step": 4375
    },
    {
      "epoch": 0.2809493264913406,
      "grad_norm": 0.7534176713677331,
      "learning_rate": 0.000180706476816128,
      "loss": 0.7793,
      "step": 4380
    },
    {
      "epoch": 0.2812700449005773,
      "grad_norm": 0.8339195487244033,
      "learning_rate": 0.00018064032284148868,
      "loss": 0.6498,
      "step": 4385
    },
    {
      "epoch": 0.281590763309814,
      "grad_norm": 1.0737472499663367,
      "learning_rate": 0.00018057406779860603,
      "loss": 0.717,
      "step": 4390
    },
    {
      "epoch": 0.28191148171905067,
      "grad_norm": 0.9978477560799941,
      "learning_rate": 0.00018050771177051896,
      "loss": 0.5892,
      "step": 4395
    },
    {
      "epoch": 0.28223220012828737,
      "grad_norm": 1.3027101386742324,
      "learning_rate": 0.00018044125484039284,
      "loss": 0.7084,
      "step": 4400
    },
    {
      "epoch": 0.28255291853752407,
      "grad_norm": 0.930029771124351,
      "learning_rate": 0.0001803746970915196,
      "loss": 0.6916,
      "step": 4405
    },
    {
      "epoch": 0.28287363694676076,
      "grad_norm": 0.7778850969886842,
      "learning_rate": 0.00018030803860731744,
      "loss": 0.7685,
      "step": 4410
    },
    {
      "epoch": 0.2831943553559974,
      "grad_norm": 0.7650986542927773,
      "learning_rate": 0.00018024127947133096,
      "loss": 0.6537,
      "step": 4415
    },
    {
      "epoch": 0.2835150737652341,
      "grad_norm": 1.5408988991120984,
      "learning_rate": 0.00018017441976723073,
      "loss": 0.7775,
      "step": 4420
    },
    {
      "epoch": 0.2838357921744708,
      "grad_norm": 1.2912216339714508,
      "learning_rate": 0.0001801074595788135,
      "loss": 0.6968,
      "step": 4425
    },
    {
      "epoch": 0.2841565105837075,
      "grad_norm": 1.0528277674684878,
      "learning_rate": 0.00018004039899000186,
      "loss": 0.6352,
      "step": 4430
    },
    {
      "epoch": 0.2844772289929442,
      "grad_norm": 0.9968577641995723,
      "learning_rate": 0.00017997323808484434,
      "loss": 0.681,
      "step": 4435
    },
    {
      "epoch": 0.2847979474021809,
      "grad_norm": 0.7048566927661232,
      "learning_rate": 0.0001799059769475151,
      "loss": 0.589,
      "step": 4440
    },
    {
      "epoch": 0.2851186658114176,
      "grad_norm": 1.2752536855080614,
      "learning_rate": 0.00017983861566231397,
      "loss": 0.6021,
      "step": 4445
    },
    {
      "epoch": 0.2854393842206543,
      "grad_norm": 0.6838772733375945,
      "learning_rate": 0.0001797711543136663,
      "loss": 0.62,
      "step": 4450
    },
    {
      "epoch": 0.28576010262989093,
      "grad_norm": 1.0992940781905054,
      "learning_rate": 0.00017970359298612282,
      "loss": 0.7695,
      "step": 4455
    },
    {
      "epoch": 0.28608082103912763,
      "grad_norm": 0.9891320713998334,
      "learning_rate": 0.00017963593176435964,
      "loss": 0.7417,
      "step": 4460
    },
    {
      "epoch": 0.28640153944836433,
      "grad_norm": 1.0219509493165506,
      "learning_rate": 0.00017956817073317793,
      "loss": 0.8078,
      "step": 4465
    },
    {
      "epoch": 0.286722257857601,
      "grad_norm": 0.601838514745307,
      "learning_rate": 0.00017950030997750414,
      "loss": 0.6521,
      "step": 4470
    },
    {
      "epoch": 0.2870429762668377,
      "grad_norm": 0.6658616403524804,
      "learning_rate": 0.00017943234958238952,
      "loss": 0.4757,
      "step": 4475
    },
    {
      "epoch": 0.2873636946760744,
      "grad_norm": 1.007316511383742,
      "learning_rate": 0.00017936428963301036,
      "loss": 0.7311,
      "step": 4480
    },
    {
      "epoch": 0.2876844130853111,
      "grad_norm": 1.1189936485732135,
      "learning_rate": 0.00017929613021466765,
      "loss": 0.6303,
      "step": 4485
    },
    {
      "epoch": 0.28800513149454776,
      "grad_norm": 0.7720709103171642,
      "learning_rate": 0.000179227871412787,
      "loss": 0.5517,
      "step": 4490
    },
    {
      "epoch": 0.28832584990378446,
      "grad_norm": 0.840259961080622,
      "learning_rate": 0.00017915951331291864,
      "loss": 0.7003,
      "step": 4495
    },
    {
      "epoch": 0.28864656831302116,
      "grad_norm": 0.7950998217641071,
      "learning_rate": 0.00017909105600073726,
      "loss": 0.6693,
      "step": 4500
    },
    {
      "epoch": 0.28896728672225785,
      "grad_norm": 0.8828219239731676,
      "learning_rate": 0.00017902249956204183,
      "loss": 0.613,
      "step": 4505
    },
    {
      "epoch": 0.28928800513149455,
      "grad_norm": 0.8050366826668545,
      "learning_rate": 0.0001789538440827557,
      "loss": 0.5657,
      "step": 4510
    },
    {
      "epoch": 0.28960872354073125,
      "grad_norm": 1.0967164706749888,
      "learning_rate": 0.00017888508964892616,
      "loss": 0.8128,
      "step": 4515
    },
    {
      "epoch": 0.28992944194996795,
      "grad_norm": 0.9150715640614145,
      "learning_rate": 0.00017881623634672465,
      "loss": 0.7572,
      "step": 4520
    },
    {
      "epoch": 0.29025016035920465,
      "grad_norm": 1.2602671775870735,
      "learning_rate": 0.00017874728426244647,
      "loss": 0.6905,
      "step": 4525
    },
    {
      "epoch": 0.2905708787684413,
      "grad_norm": 0.9346668957570068,
      "learning_rate": 0.00017867823348251076,
      "loss": 0.7051,
      "step": 4530
    },
    {
      "epoch": 0.290891597177678,
      "grad_norm": 0.7910849436025686,
      "learning_rate": 0.00017860908409346034,
      "loss": 0.709,
      "step": 4535
    },
    {
      "epoch": 0.2912123155869147,
      "grad_norm": 0.8218374279342303,
      "learning_rate": 0.0001785398361819616,
      "loss": 0.5839,
      "step": 4540
    },
    {
      "epoch": 0.2915330339961514,
      "grad_norm": 0.8511332345341893,
      "learning_rate": 0.0001784704898348045,
      "loss": 0.7218,
      "step": 4545
    },
    {
      "epoch": 0.2918537524053881,
      "grad_norm": 1.2396495867604176,
      "learning_rate": 0.0001784010451389022,
      "loss": 0.5707,
      "step": 4550
    },
    {
      "epoch": 0.2921744708146248,
      "grad_norm": 0.5453795713818735,
      "learning_rate": 0.00017833150218129129,
      "loss": 0.7248,
      "step": 4555
    },
    {
      "epoch": 0.2924951892238615,
      "grad_norm": 0.8544441259057197,
      "learning_rate": 0.00017826186104913142,
      "loss": 0.6706,
      "step": 4560
    },
    {
      "epoch": 0.2928159076330981,
      "grad_norm": 0.7078874543955929,
      "learning_rate": 0.00017819212182970535,
      "loss": 0.6732,
      "step": 4565
    },
    {
      "epoch": 0.2931366260423348,
      "grad_norm": 1.1258864806353122,
      "learning_rate": 0.0001781222846104187,
      "loss": 0.696,
      "step": 4570
    },
    {
      "epoch": 0.2934573444515715,
      "grad_norm": 0.8952983146425741,
      "learning_rate": 0.00017805234947879993,
      "loss": 0.6778,
      "step": 4575
    },
    {
      "epoch": 0.2937780628608082,
      "grad_norm": 1.078013753440664,
      "learning_rate": 0.0001779823165225003,
      "loss": 0.6494,
      "step": 4580
    },
    {
      "epoch": 0.2940987812700449,
      "grad_norm": 1.2457998074637708,
      "learning_rate": 0.0001779121858292936,
      "loss": 0.6356,
      "step": 4585
    },
    {
      "epoch": 0.2944194996792816,
      "grad_norm": 0.9452414867290724,
      "learning_rate": 0.0001778419574870761,
      "loss": 0.7049,
      "step": 4590
    },
    {
      "epoch": 0.2947402180885183,
      "grad_norm": 1.0903318911783695,
      "learning_rate": 0.00017777163158386647,
      "loss": 0.653,
      "step": 4595
    },
    {
      "epoch": 0.29506093649775494,
      "grad_norm": 1.172298521370259,
      "learning_rate": 0.00017770120820780573,
      "loss": 0.7285,
      "step": 4600
    },
    {
      "epoch": 0.29538165490699164,
      "grad_norm": 0.6583420678299451,
      "learning_rate": 0.00017763068744715697,
      "loss": 0.6031,
      "step": 4605
    },
    {
      "epoch": 0.29570237331622834,
      "grad_norm": 0.8591774180151724,
      "learning_rate": 0.00017756006939030535,
      "loss": 0.7409,
      "step": 4610
    },
    {
      "epoch": 0.29602309172546504,
      "grad_norm": 0.6898541329818539,
      "learning_rate": 0.00017748935412575804,
      "loss": 0.589,
      "step": 4615
    },
    {
      "epoch": 0.29634381013470174,
      "grad_norm": 0.5395272492697519,
      "learning_rate": 0.000177418541742144,
      "loss": 0.708,
      "step": 4620
    },
    {
      "epoch": 0.29666452854393843,
      "grad_norm": 1.0169898045901036,
      "learning_rate": 0.0001773476323282138,
      "loss": 0.6948,
      "step": 4625
    },
    {
      "epoch": 0.29698524695317513,
      "grad_norm": 1.0000948614259928,
      "learning_rate": 0.00017727662597283986,
      "loss": 0.7215,
      "step": 4630
    },
    {
      "epoch": 0.29730596536241183,
      "grad_norm": 0.9689865733719959,
      "learning_rate": 0.00017720552276501592,
      "loss": 0.6701,
      "step": 4635
    },
    {
      "epoch": 0.29762668377164847,
      "grad_norm": 0.6557948134140331,
      "learning_rate": 0.00017713432279385712,
      "loss": 0.6235,
      "step": 4640
    },
    {
      "epoch": 0.29794740218088517,
      "grad_norm": 1.1877573091679572,
      "learning_rate": 0.00017706302614859992,
      "loss": 0.7863,
      "step": 4645
    },
    {
      "epoch": 0.29826812059012187,
      "grad_norm": 0.8462973100804213,
      "learning_rate": 0.00017699163291860198,
      "loss": 0.5724,
      "step": 4650
    },
    {
      "epoch": 0.29858883899935856,
      "grad_norm": 0.9236445624740109,
      "learning_rate": 0.0001769201431933419,
      "loss": 0.5787,
      "step": 4655
    },
    {
      "epoch": 0.29890955740859526,
      "grad_norm": 1.0716376234952218,
      "learning_rate": 0.00017684855706241934,
      "loss": 0.7401,
      "step": 4660
    },
    {
      "epoch": 0.29923027581783196,
      "grad_norm": 1.1600311786248418,
      "learning_rate": 0.00017677687461555467,
      "loss": 0.708,
      "step": 4665
    },
    {
      "epoch": 0.29955099422706866,
      "grad_norm": 0.7413385734559219,
      "learning_rate": 0.00017670509594258912,
      "loss": 0.5718,
      "step": 4670
    },
    {
      "epoch": 0.2998717126363053,
      "grad_norm": 0.9348593211146833,
      "learning_rate": 0.00017663322113348434,
      "loss": 0.7492,
      "step": 4675
    },
    {
      "epoch": 0.300192431045542,
      "grad_norm": 1.5696315279326167,
      "learning_rate": 0.0001765612502783226,
      "loss": 0.6552,
      "step": 4680
    },
    {
      "epoch": 0.3005131494547787,
      "grad_norm": 1.0990775256909542,
      "learning_rate": 0.00017648918346730653,
      "loss": 0.582,
      "step": 4685
    },
    {
      "epoch": 0.3008338678640154,
      "grad_norm": 0.7467674097224691,
      "learning_rate": 0.00017641702079075904,
      "loss": 0.6326,
      "step": 4690
    },
    {
      "epoch": 0.3011545862732521,
      "grad_norm": 0.7256436706311058,
      "learning_rate": 0.00017634476233912308,
      "loss": 0.7717,
      "step": 4695
    },
    {
      "epoch": 0.3014753046824888,
      "grad_norm": 0.754840650778496,
      "learning_rate": 0.00017627240820296177,
      "loss": 0.6896,
      "step": 4700
    },
    {
      "epoch": 0.3017960230917255,
      "grad_norm": 0.7072150395545665,
      "learning_rate": 0.0001761999584729581,
      "loss": 0.6332,
      "step": 4705
    },
    {
      "epoch": 0.3021167415009622,
      "grad_norm": 1.2009873604762311,
      "learning_rate": 0.00017612741323991488,
      "loss": 0.6393,
      "step": 4710
    },
    {
      "epoch": 0.3024374599101988,
      "grad_norm": 0.6086745243060716,
      "learning_rate": 0.0001760547725947545,
      "loss": 0.6681,
      "step": 4715
    },
    {
      "epoch": 0.3027581783194355,
      "grad_norm": 0.9853085984018423,
      "learning_rate": 0.0001759820366285192,
      "loss": 0.5961,
      "step": 4720
    },
    {
      "epoch": 0.3030788967286722,
      "grad_norm": 1.0109466174974706,
      "learning_rate": 0.00017590920543237036,
      "loss": 0.7225,
      "step": 4725
    },
    {
      "epoch": 0.3033996151379089,
      "grad_norm": 1.2139597067132748,
      "learning_rate": 0.00017583627909758902,
      "loss": 0.6542,
      "step": 4730
    },
    {
      "epoch": 0.3037203335471456,
      "grad_norm": 0.9478885183065455,
      "learning_rate": 0.00017576325771557518,
      "loss": 0.6881,
      "step": 4735
    },
    {
      "epoch": 0.3040410519563823,
      "grad_norm": 0.8539507613861936,
      "learning_rate": 0.00017569014137784822,
      "loss": 0.6331,
      "step": 4740
    },
    {
      "epoch": 0.304361770365619,
      "grad_norm": 0.9679885840401695,
      "learning_rate": 0.00017561693017604637,
      "loss": 0.7997,
      "step": 4745
    },
    {
      "epoch": 0.30468248877485565,
      "grad_norm": 0.9422216475894025,
      "learning_rate": 0.00017554362420192676,
      "loss": 0.6769,
      "step": 4750
    },
    {
      "epoch": 0.30500320718409235,
      "grad_norm": 1.0998446041770769,
      "learning_rate": 0.00017547022354736538,
      "loss": 0.6072,
      "step": 4755
    },
    {
      "epoch": 0.30532392559332905,
      "grad_norm": 1.0857238442878236,
      "learning_rate": 0.00017539672830435682,
      "loss": 0.7689,
      "step": 4760
    },
    {
      "epoch": 0.30564464400256575,
      "grad_norm": 0.7440444931879342,
      "learning_rate": 0.00017532313856501427,
      "loss": 0.5841,
      "step": 4765
    },
    {
      "epoch": 0.30596536241180244,
      "grad_norm": 0.7172978744287396,
      "learning_rate": 0.0001752494544215693,
      "loss": 0.6583,
      "step": 4770
    },
    {
      "epoch": 0.30628608082103914,
      "grad_norm": 1.2045039512423583,
      "learning_rate": 0.00017517567596637184,
      "loss": 0.6052,
      "step": 4775
    },
    {
      "epoch": 0.30660679923027584,
      "grad_norm": 0.6334336485782317,
      "learning_rate": 0.00017510180329189,
      "loss": 0.6194,
      "step": 4780
    },
    {
      "epoch": 0.3069275176395125,
      "grad_norm": 1.3899325242838065,
      "learning_rate": 0.00017502783649070994,
      "loss": 0.7102,
      "step": 4785
    },
    {
      "epoch": 0.3072482360487492,
      "grad_norm": 1.1877009077958471,
      "learning_rate": 0.00017495377565553594,
      "loss": 0.683,
      "step": 4790
    },
    {
      "epoch": 0.3075689544579859,
      "grad_norm": 1.1043105680832985,
      "learning_rate": 0.00017487962087918993,
      "loss": 0.6165,
      "step": 4795
    },
    {
      "epoch": 0.3078896728672226,
      "grad_norm": 0.9571802341999754,
      "learning_rate": 0.00017480537225461178,
      "loss": 0.499,
      "step": 4800
    },
    {
      "epoch": 0.3082103912764593,
      "grad_norm": 1.0846077393930171,
      "learning_rate": 0.00017473102987485876,
      "loss": 0.7685,
      "step": 4805
    },
    {
      "epoch": 0.30853110968569597,
      "grad_norm": 0.9095961738585777,
      "learning_rate": 0.00017465659383310587,
      "loss": 0.6373,
      "step": 4810
    },
    {
      "epoch": 0.30885182809493267,
      "grad_norm": 1.1872255037042634,
      "learning_rate": 0.00017458206422264533,
      "loss": 0.6564,
      "step": 4815
    },
    {
      "epoch": 0.30917254650416937,
      "grad_norm": 1.0600317447426089,
      "learning_rate": 0.00017450744113688672,
      "loss": 0.6103,
      "step": 4820
    },
    {
      "epoch": 0.309493264913406,
      "grad_norm": 0.89956531270657,
      "learning_rate": 0.00017443272466935675,
      "loss": 0.7056,
      "step": 4825
    },
    {
      "epoch": 0.3098139833226427,
      "grad_norm": 0.6138048573378617,
      "learning_rate": 0.00017435791491369917,
      "loss": 0.6437,
      "step": 4830
    },
    {
      "epoch": 0.3101347017318794,
      "grad_norm": 0.6479672204769544,
      "learning_rate": 0.00017428301196367464,
      "loss": 0.7149,
      "step": 4835
    },
    {
      "epoch": 0.3104554201411161,
      "grad_norm": 0.9059240016877552,
      "learning_rate": 0.00017420801591316062,
      "loss": 0.6641,
      "step": 4840
    },
    {
      "epoch": 0.3107761385503528,
      "grad_norm": 0.7000331742442105,
      "learning_rate": 0.00017413292685615134,
      "loss": 0.6227,
      "step": 4845
    },
    {
      "epoch": 0.3110968569595895,
      "grad_norm": 0.8706735159170973,
      "learning_rate": 0.00017405774488675742,
      "loss": 0.6191,
      "step": 4850
    },
    {
      "epoch": 0.3114175753688262,
      "grad_norm": 0.9657278531523165,
      "learning_rate": 0.0001739824700992061,
      "loss": 0.5956,
      "step": 4855
    },
    {
      "epoch": 0.31173829377806284,
      "grad_norm": 0.9553637466697323,
      "learning_rate": 0.0001739071025878409,
      "loss": 0.7627,
      "step": 4860
    },
    {
      "epoch": 0.31205901218729953,
      "grad_norm": 1.1595347795694808,
      "learning_rate": 0.00017383164244712146,
      "loss": 0.6432,
      "step": 4865
    },
    {
      "epoch": 0.31237973059653623,
      "grad_norm": 1.3557930665103466,
      "learning_rate": 0.0001737560897716236,
      "loss": 0.6965,
      "step": 4870
    },
    {
      "epoch": 0.31270044900577293,
      "grad_norm": 0.919377290874929,
      "learning_rate": 0.00017368044465603915,
      "loss": 0.6913,
      "step": 4875
    },
    {
      "epoch": 0.3130211674150096,
      "grad_norm": 0.9179711638304333,
      "learning_rate": 0.00017360470719517577,
      "loss": 0.5516,
      "step": 4880
    },
    {
      "epoch": 0.3133418858242463,
      "grad_norm": 0.8074363475177312,
      "learning_rate": 0.00017352887748395678,
      "loss": 0.6421,
      "step": 4885
    },
    {
      "epoch": 0.313662604233483,
      "grad_norm": 1.3217851235374773,
      "learning_rate": 0.00017345295561742123,
      "loss": 0.7387,
      "step": 4890
    },
    {
      "epoch": 0.31398332264271966,
      "grad_norm": 0.8100107368582629,
      "learning_rate": 0.0001733769416907236,
      "loss": 0.6104,
      "step": 4895
    },
    {
      "epoch": 0.31430404105195636,
      "grad_norm": 1.0974582938152775,
      "learning_rate": 0.0001733008357991338,
      "loss": 0.649,
      "step": 4900
    },
    {
      "epoch": 0.31462475946119306,
      "grad_norm": 1.233711986487123,
      "learning_rate": 0.00017322463803803688,
      "loss": 0.5448,
      "step": 4905
    },
    {
      "epoch": 0.31494547787042976,
      "grad_norm": 0.8777266459889339,
      "learning_rate": 0.00017314834850293325,
      "loss": 0.7512,
      "step": 4910
    },
    {
      "epoch": 0.31526619627966646,
      "grad_norm": 0.8794148401176598,
      "learning_rate": 0.00017307196728943812,
      "loss": 0.6314,
      "step": 4915
    },
    {
      "epoch": 0.31558691468890315,
      "grad_norm": 0.7021113325319495,
      "learning_rate": 0.00017299549449328175,
      "loss": 0.5404,
      "step": 4920
    },
    {
      "epoch": 0.31590763309813985,
      "grad_norm": 0.76819009517203,
      "learning_rate": 0.00017291893021030913,
      "loss": 0.7646,
      "step": 4925
    },
    {
      "epoch": 0.31622835150737655,
      "grad_norm": 1.3281150753972946,
      "learning_rate": 0.00017284227453647993,
      "loss": 0.6404,
      "step": 4930
    },
    {
      "epoch": 0.3165490699166132,
      "grad_norm": 0.8777792257027988,
      "learning_rate": 0.00017276552756786831,
      "loss": 0.7211,
      "step": 4935
    },
    {
      "epoch": 0.3168697883258499,
      "grad_norm": 0.9522765071117524,
      "learning_rate": 0.00017268868940066288,
      "loss": 0.7659,
      "step": 4940
    },
    {
      "epoch": 0.3171905067350866,
      "grad_norm": 0.7347381221386469,
      "learning_rate": 0.0001726117601311666,
      "loss": 0.7521,
      "step": 4945
    },
    {
      "epoch": 0.3175112251443233,
      "grad_norm": 0.947686463596072,
      "learning_rate": 0.00017253473985579657,
      "loss": 0.6981,
      "step": 4950
    },
    {
      "epoch": 0.31783194355356,
      "grad_norm": 0.9948270615790568,
      "learning_rate": 0.0001724576286710839,
      "loss": 0.5347,
      "step": 4955
    },
    {
      "epoch": 0.3181526619627967,
      "grad_norm": 0.7412951434019396,
      "learning_rate": 0.00017238042667367377,
      "loss": 0.6563,
      "step": 4960
    },
    {
      "epoch": 0.3184733803720334,
      "grad_norm": 0.9060455966464537,
      "learning_rate": 0.00017230313396032504,
      "loss": 0.8452,
      "step": 4965
    },
    {
      "epoch": 0.31879409878127,
      "grad_norm": 0.7926379737171755,
      "learning_rate": 0.00017222575062791033,
      "loss": 0.6834,
      "step": 4970
    },
    {
      "epoch": 0.3191148171905067,
      "grad_norm": 1.1978749811848812,
      "learning_rate": 0.00017214827677341582,
      "loss": 0.5959,
      "step": 4975
    },
    {
      "epoch": 0.3194355355997434,
      "grad_norm": 1.1382243993856835,
      "learning_rate": 0.00017207071249394118,
      "loss": 0.8144,
      "step": 4980
    },
    {
      "epoch": 0.3197562540089801,
      "grad_norm": 0.9207041310652729,
      "learning_rate": 0.00017199305788669937,
      "loss": 0.7515,
      "step": 4985
    },
    {
      "epoch": 0.3200769724182168,
      "grad_norm": 0.7762438521118743,
      "learning_rate": 0.00017191531304901653,
      "loss": 0.7128,
      "step": 4990
    },
    {
      "epoch": 0.3203976908274535,
      "grad_norm": 1.0657161158728048,
      "learning_rate": 0.000171837478078332,
      "loss": 0.7206,
      "step": 4995
    },
    {
      "epoch": 0.3207184092366902,
      "grad_norm": 0.8853471042976426,
      "learning_rate": 0.00017175955307219796,
      "loss": 0.6661,
      "step": 5000
    },
    {
      "epoch": 0.3210391276459269,
      "grad_norm": 0.730931049927295,
      "learning_rate": 0.00017168153812827957,
      "loss": 0.7177,
      "step": 5005
    },
    {
      "epoch": 0.32135984605516354,
      "grad_norm": 1.24238938271146,
      "learning_rate": 0.0001716034333443545,
      "loss": 0.7264,
      "step": 5010
    },
    {
      "epoch": 0.32168056446440024,
      "grad_norm": 1.0598509644567646,
      "learning_rate": 0.00017152523881831325,
      "loss": 0.5868,
      "step": 5015
    },
    {
      "epoch": 0.32200128287363694,
      "grad_norm": 1.142674205123222,
      "learning_rate": 0.00017144695464815866,
      "loss": 0.7652,
      "step": 5020
    },
    {
      "epoch": 0.32232200128287364,
      "grad_norm": 1.2248444413302872,
      "learning_rate": 0.00017136858093200593,
      "loss": 0.6078,
      "step": 5025
    },
    {
      "epoch": 0.32264271969211034,
      "grad_norm": 0.9090404485944782,
      "learning_rate": 0.00017129011776808258,
      "loss": 0.6921,
      "step": 5030
    },
    {
      "epoch": 0.32296343810134703,
      "grad_norm": 1.0978730524660503,
      "learning_rate": 0.00017121156525472814,
      "loss": 0.7593,
      "step": 5035
    },
    {
      "epoch": 0.32328415651058373,
      "grad_norm": 1.8023280272488704,
      "learning_rate": 0.00017113292349039413,
      "loss": 0.7583,
      "step": 5040
    },
    {
      "epoch": 0.3236048749198204,
      "grad_norm": 1.0487723489551213,
      "learning_rate": 0.000171054192573644,
      "loss": 0.7754,
      "step": 5045
    },
    {
      "epoch": 0.32392559332905707,
      "grad_norm": 0.7931120571928945,
      "learning_rate": 0.0001709753726031529,
      "loss": 0.7182,
      "step": 5050
    },
    {
      "epoch": 0.32424631173829377,
      "grad_norm": 1.3448284362405596,
      "learning_rate": 0.00017089646367770756,
      "loss": 0.6391,
      "step": 5055
    },
    {
      "epoch": 0.32456703014753047,
      "grad_norm": 0.9771883061194023,
      "learning_rate": 0.0001708174658962062,
      "loss": 0.632,
      "step": 5060
    },
    {
      "epoch": 0.32488774855676716,
      "grad_norm": 0.944625885099161,
      "learning_rate": 0.00017073837935765846,
      "loss": 0.6235,
      "step": 5065
    },
    {
      "epoch": 0.32520846696600386,
      "grad_norm": 0.9899695819556337,
      "learning_rate": 0.00017065920416118522,
      "loss": 0.7345,
      "step": 5070
    },
    {
      "epoch": 0.32552918537524056,
      "grad_norm": 0.5815153267452241,
      "learning_rate": 0.00017057994040601838,
      "loss": 0.5988,
      "step": 5075
    },
    {
      "epoch": 0.3258499037844772,
      "grad_norm": 0.7182304509869034,
      "learning_rate": 0.00017050058819150098,
      "loss": 0.5962,
      "step": 5080
    },
    {
      "epoch": 0.3261706221937139,
      "grad_norm": 0.7916342652857238,
      "learning_rate": 0.0001704211476170868,
      "loss": 0.5903,
      "step": 5085
    },
    {
      "epoch": 0.3264913406029506,
      "grad_norm": 1.186592480709318,
      "learning_rate": 0.00017034161878234043,
      "loss": 0.7071,
      "step": 5090
    },
    {
      "epoch": 0.3268120590121873,
      "grad_norm": 1.4501384859209354,
      "learning_rate": 0.00017026200178693704,
      "loss": 0.5699,
      "step": 5095
    },
    {
      "epoch": 0.327132777421424,
      "grad_norm": 0.4770414244602479,
      "learning_rate": 0.0001701822967306624,
      "loss": 0.6942,
      "step": 5100
    },
    {
      "epoch": 0.3274534958306607,
      "grad_norm": 1.2188679878291713,
      "learning_rate": 0.00017010250371341244,
      "loss": 0.6633,
      "step": 5105
    },
    {
      "epoch": 0.3277742142398974,
      "grad_norm": 1.0813857287425748,
      "learning_rate": 0.0001700226228351935,
      "loss": 0.6257,
      "step": 5110
    },
    {
      "epoch": 0.3280949326491341,
      "grad_norm": 0.8540165463861037,
      "learning_rate": 0.00016994265419612205,
      "loss": 0.5918,
      "step": 5115
    },
    {
      "epoch": 0.32841565105837073,
      "grad_norm": 1.1642007608342173,
      "learning_rate": 0.00016986259789642444,
      "loss": 0.6911,
      "step": 5120
    },
    {
      "epoch": 0.3287363694676074,
      "grad_norm": 0.8539433327300491,
      "learning_rate": 0.00016978245403643694,
      "loss": 0.7732,
      "step": 5125
    },
    {
      "epoch": 0.3290570878768441,
      "grad_norm": 1.0202618411725253,
      "learning_rate": 0.0001697022227166056,
      "loss": 0.7798,
      "step": 5130
    },
    {
      "epoch": 0.3293778062860808,
      "grad_norm": 0.8876324268732894,
      "learning_rate": 0.00016962190403748605,
      "loss": 0.714,
      "step": 5135
    },
    {
      "epoch": 0.3296985246953175,
      "grad_norm": 0.7783501191713772,
      "learning_rate": 0.0001695414980997434,
      "loss": 0.7987,
      "step": 5140
    },
    {
      "epoch": 0.3300192431045542,
      "grad_norm": 1.204240570280653,
      "learning_rate": 0.00016946100500415213,
      "loss": 0.6914,
      "step": 5145
    },
    {
      "epoch": 0.3303399615137909,
      "grad_norm": 0.7152048301163425,
      "learning_rate": 0.00016938042485159594,
      "loss": 0.6703,
      "step": 5150
    },
    {
      "epoch": 0.33066067992302756,
      "grad_norm": 1.191922058294469,
      "learning_rate": 0.0001692997577430677,
      "loss": 0.6539,
      "step": 5155
    },
    {
      "epoch": 0.33098139833226425,
      "grad_norm": 0.8187793173057333,
      "learning_rate": 0.00016921900377966923,
      "loss": 0.7468,
      "step": 5160
    },
    {
      "epoch": 0.33130211674150095,
      "grad_norm": 0.9381392106872509,
      "learning_rate": 0.00016913816306261112,
      "loss": 0.766,
      "step": 5165
    },
    {
      "epoch": 0.33162283515073765,
      "grad_norm": 0.7128118797176758,
      "learning_rate": 0.00016905723569321288,
      "loss": 0.6719,
      "step": 5170
    },
    {
      "epoch": 0.33194355355997435,
      "grad_norm": 1.500297575057347,
      "learning_rate": 0.00016897622177290244,
      "loss": 0.7072,
      "step": 5175
    },
    {
      "epoch": 0.33226427196921104,
      "grad_norm": 0.9800774031498481,
      "learning_rate": 0.0001688951214032163,
      "loss": 0.6549,
      "step": 5180
    },
    {
      "epoch": 0.33258499037844774,
      "grad_norm": 0.8808790723791357,
      "learning_rate": 0.00016881393468579932,
      "loss": 0.6955,
      "step": 5185
    },
    {
      "epoch": 0.33290570878768444,
      "grad_norm": 0.8920914860291771,
      "learning_rate": 0.00016873266172240452,
      "loss": 0.5649,
      "step": 5190
    },
    {
      "epoch": 0.3332264271969211,
      "grad_norm": 0.6851960157071083,
      "learning_rate": 0.00016865130261489305,
      "loss": 0.6897,
      "step": 5195
    },
    {
      "epoch": 0.3335471456061578,
      "grad_norm": 0.8407283098592762,
      "learning_rate": 0.00016856985746523405,
      "loss": 0.6559,
      "step": 5200
    },
    {
      "epoch": 0.3338678640153945,
      "grad_norm": 0.9215186470532375,
      "learning_rate": 0.00016848832637550437,
      "loss": 0.7664,
      "step": 5205
    },
    {
      "epoch": 0.3341885824246312,
      "grad_norm": 0.7299164606010856,
      "learning_rate": 0.00016840670944788882,
      "loss": 0.5981,
      "step": 5210
    },
    {
      "epoch": 0.3345093008338679,
      "grad_norm": 0.8732424966610127,
      "learning_rate": 0.00016832500678467952,
      "loss": 0.7035,
      "step": 5215
    },
    {
      "epoch": 0.33483001924310457,
      "grad_norm": 0.9750167638289885,
      "learning_rate": 0.00016824321848827624,
      "loss": 0.5995,
      "step": 5220
    },
    {
      "epoch": 0.33515073765234127,
      "grad_norm": 1.0976388995980935,
      "learning_rate": 0.00016816134466118596,
      "loss": 0.7107,
      "step": 5225
    },
    {
      "epoch": 0.3354714560615779,
      "grad_norm": 1.0135781126967063,
      "learning_rate": 0.00016807938540602292,
      "loss": 0.7174,
      "step": 5230
    },
    {
      "epoch": 0.3357921744708146,
      "grad_norm": 0.8189118457664761,
      "learning_rate": 0.00016799734082550844,
      "loss": 0.6645,
      "step": 5235
    },
    {
      "epoch": 0.3361128928800513,
      "grad_norm": 0.6996919391876488,
      "learning_rate": 0.0001679152110224707,
      "loss": 0.6629,
      "step": 5240
    },
    {
      "epoch": 0.336433611289288,
      "grad_norm": 0.7381428623848976,
      "learning_rate": 0.00016783299609984478,
      "loss": 0.6016,
      "step": 5245
    },
    {
      "epoch": 0.3367543296985247,
      "grad_norm": 0.9095764087290898,
      "learning_rate": 0.00016775069616067233,
      "loss": 0.8577,
      "step": 5250
    },
    {
      "epoch": 0.3370750481077614,
      "grad_norm": 0.7032412366347235,
      "learning_rate": 0.00016766831130810171,
      "loss": 0.7342,
      "step": 5255
    },
    {
      "epoch": 0.3373957665169981,
      "grad_norm": 0.9697869860649856,
      "learning_rate": 0.00016758584164538757,
      "loss": 0.6338,
      "step": 5260
    },
    {
      "epoch": 0.33771648492623474,
      "grad_norm": 0.7784503288752077,
      "learning_rate": 0.00016750328727589095,
      "loss": 0.6666,
      "step": 5265
    },
    {
      "epoch": 0.33803720333547144,
      "grad_norm": 0.5156266401874552,
      "learning_rate": 0.00016742064830307897,
      "loss": 0.7699,
      "step": 5270
    },
    {
      "epoch": 0.33835792174470813,
      "grad_norm": 1.0003590365934907,
      "learning_rate": 0.0001673379248305248,
      "loss": 0.6751,
      "step": 5275
    },
    {
      "epoch": 0.33867864015394483,
      "grad_norm": 0.8026066074245787,
      "learning_rate": 0.0001672551169619076,
      "loss": 0.7573,
      "step": 5280
    },
    {
      "epoch": 0.33899935856318153,
      "grad_norm": 1.0369937352211243,
      "learning_rate": 0.00016717222480101221,
      "loss": 0.667,
      "step": 5285
    },
    {
      "epoch": 0.3393200769724182,
      "grad_norm": 0.9644006720446381,
      "learning_rate": 0.0001670892484517292,
      "loss": 0.6383,
      "step": 5290
    },
    {
      "epoch": 0.3396407953816549,
      "grad_norm": 1.0076204289252497,
      "learning_rate": 0.00016700618801805453,
      "loss": 0.7178,
      "step": 5295
    },
    {
      "epoch": 0.3399615137908916,
      "grad_norm": 0.5579579624666732,
      "learning_rate": 0.00016692304360408966,
      "loss": 0.6665,
      "step": 5300
    },
    {
      "epoch": 0.34028223220012827,
      "grad_norm": 0.8064350566112853,
      "learning_rate": 0.00016683981531404125,
      "loss": 0.5122,
      "step": 5305
    },
    {
      "epoch": 0.34060295060936496,
      "grad_norm": 0.9816255727453933,
      "learning_rate": 0.0001667565032522211,
      "loss": 0.6926,
      "step": 5310
    },
    {
      "epoch": 0.34092366901860166,
      "grad_norm": 0.817929460216783,
      "learning_rate": 0.00016667310752304602,
      "loss": 0.5491,
      "step": 5315
    },
    {
      "epoch": 0.34124438742783836,
      "grad_norm": 0.9215347160545883,
      "learning_rate": 0.00016658962823103764,
      "loss": 0.6835,
      "step": 5320
    },
    {
      "epoch": 0.34156510583707506,
      "grad_norm": 1.1290419292904414,
      "learning_rate": 0.00016650606548082236,
      "loss": 0.735,
      "step": 5325
    },
    {
      "epoch": 0.34188582424631175,
      "grad_norm": 1.1930691902617288,
      "learning_rate": 0.0001664224193771312,
      "loss": 0.5138,
      "step": 5330
    },
    {
      "epoch": 0.34220654265554845,
      "grad_norm": 0.8088938421114102,
      "learning_rate": 0.0001663386900247995,
      "loss": 0.6654,
      "step": 5335
    },
    {
      "epoch": 0.3425272610647851,
      "grad_norm": 0.5514542526950761,
      "learning_rate": 0.0001662548775287672,
      "loss": 0.6456,
      "step": 5340
    },
    {
      "epoch": 0.3428479794740218,
      "grad_norm": 0.8205842308107273,
      "learning_rate": 0.00016617098199407814,
      "loss": 0.7144,
      "step": 5345
    },
    {
      "epoch": 0.3431686978832585,
      "grad_norm": 0.9295493105678805,
      "learning_rate": 0.00016608700352588053,
      "loss": 0.6876,
      "step": 5350
    },
    {
      "epoch": 0.3434894162924952,
      "grad_norm": 0.7296614219020304,
      "learning_rate": 0.00016600294222942626,
      "loss": 0.6785,
      "step": 5355
    },
    {
      "epoch": 0.3438101347017319,
      "grad_norm": 0.6002339895362847,
      "learning_rate": 0.00016591879821007126,
      "loss": 0.5796,
      "step": 5360
    },
    {
      "epoch": 0.3441308531109686,
      "grad_norm": 1.6160052086574104,
      "learning_rate": 0.00016583457157327497,
      "loss": 0.7118,
      "step": 5365
    },
    {
      "epoch": 0.3444515715202053,
      "grad_norm": 1.2282552121625845,
      "learning_rate": 0.00016575026242460046,
      "loss": 0.6564,
      "step": 5370
    },
    {
      "epoch": 0.344772289929442,
      "grad_norm": 0.9643175110463178,
      "learning_rate": 0.00016566587086971416,
      "loss": 0.669,
      "step": 5375
    },
    {
      "epoch": 0.3450930083386786,
      "grad_norm": 0.9607772443483632,
      "learning_rate": 0.00016558139701438584,
      "loss": 0.6276,
      "step": 5380
    },
    {
      "epoch": 0.3454137267479153,
      "grad_norm": 0.9147875672042459,
      "learning_rate": 0.0001654968409644884,
      "loss": 0.5905,
      "step": 5385
    },
    {
      "epoch": 0.345734445157152,
      "grad_norm": 0.7334238812099275,
      "learning_rate": 0.00016541220282599773,
      "loss": 0.6261,
      "step": 5390
    },
    {
      "epoch": 0.3460551635663887,
      "grad_norm": 1.1742953273617749,
      "learning_rate": 0.00016532748270499262,
      "loss": 0.7,
      "step": 5395
    },
    {
      "epoch": 0.3463758819756254,
      "grad_norm": 1.1387016781633938,
      "learning_rate": 0.00016524268070765465,
      "loss": 0.7061,
      "step": 5400
    },
    {
      "epoch": 0.3466966003848621,
      "grad_norm": 0.9794060869341327,
      "learning_rate": 0.0001651577969402679,
      "loss": 0.7031,
      "step": 5405
    },
    {
      "epoch": 0.3470173187940988,
      "grad_norm": 0.9732807122694793,
      "learning_rate": 0.0001650728315092191,
      "loss": 0.6588,
      "step": 5410
    },
    {
      "epoch": 0.34733803720333545,
      "grad_norm": 1.2045887990242425,
      "learning_rate": 0.0001649877845209972,
      "loss": 0.5635,
      "step": 5415
    },
    {
      "epoch": 0.34765875561257215,
      "grad_norm": 0.9098967972234847,
      "learning_rate": 0.0001649026560821934,
      "loss": 0.6877,
      "step": 5420
    },
    {
      "epoch": 0.34797947402180884,
      "grad_norm": 0.8919518792507914,
      "learning_rate": 0.000164817446299501,
      "loss": 0.852,
      "step": 5425
    },
    {
      "epoch": 0.34830019243104554,
      "grad_norm": 1.082286394388753,
      "learning_rate": 0.00016473215527971528,
      "loss": 0.6497,
      "step": 5430
    },
    {
      "epoch": 0.34862091084028224,
      "grad_norm": 0.7681820908697059,
      "learning_rate": 0.00016464678312973327,
      "loss": 0.7075,
      "step": 5435
    },
    {
      "epoch": 0.34894162924951894,
      "grad_norm": 0.8577629521944062,
      "learning_rate": 0.00016456132995655372,
      "loss": 0.6942,
      "step": 5440
    },
    {
      "epoch": 0.34926234765875563,
      "grad_norm": 0.7981749008936162,
      "learning_rate": 0.00016447579586727692,
      "loss": 0.6658,
      "step": 5445
    },
    {
      "epoch": 0.3495830660679923,
      "grad_norm": 0.6566080494812765,
      "learning_rate": 0.0001643901809691046,
      "loss": 0.6325,
      "step": 5450
    },
    {
      "epoch": 0.349903784477229,
      "grad_norm": 0.7729498372329889,
      "learning_rate": 0.00016430448536933965,
      "loss": 0.5609,
      "step": 5455
    },
    {
      "epoch": 0.35022450288646567,
      "grad_norm": 1.0464507162443157,
      "learning_rate": 0.00016421870917538635,
      "loss": 0.6353,
      "step": 5460
    },
    {
      "epoch": 0.35054522129570237,
      "grad_norm": 1.3013839685098925,
      "learning_rate": 0.00016413285249474975,
      "loss": 0.5724,
      "step": 5465
    },
    {
      "epoch": 0.35086593970493907,
      "grad_norm": 0.813558813259816,
      "learning_rate": 0.00016404691543503588,
      "loss": 0.7074,
      "step": 5470
    },
    {
      "epoch": 0.35118665811417576,
      "grad_norm": 1.001748370098994,
      "learning_rate": 0.0001639608981039515,
      "loss": 0.7945,
      "step": 5475
    },
    {
      "epoch": 0.35150737652341246,
      "grad_norm": 0.870149957049954,
      "learning_rate": 0.00016387480060930395,
      "loss": 0.689,
      "step": 5480
    },
    {
      "epoch": 0.35182809493264916,
      "grad_norm": 0.8680578535676656,
      "learning_rate": 0.00016378862305900112,
      "loss": 0.6239,
      "step": 5485
    },
    {
      "epoch": 0.3521488133418858,
      "grad_norm": 0.8274627515878666,
      "learning_rate": 0.0001637023655610511,
      "loss": 0.6437,
      "step": 5490
    },
    {
      "epoch": 0.3524695317511225,
      "grad_norm": 0.8836905220838523,
      "learning_rate": 0.00016361602822356232,
      "loss": 0.581,
      "step": 5495
    },
    {
      "epoch": 0.3527902501603592,
      "grad_norm": 0.645087928333498,
      "learning_rate": 0.0001635296111547432,
      "loss": 0.65,
      "step": 5500
    },
    {
      "epoch": 0.3531109685695959,
      "grad_norm": 0.9138176884852274,
      "learning_rate": 0.00016344311446290212,
      "loss": 0.6039,
      "step": 5505
    },
    {
      "epoch": 0.3534316869788326,
      "grad_norm": 0.8932196439321753,
      "learning_rate": 0.00016335653825644717,
      "loss": 0.6447,
      "step": 5510
    },
    {
      "epoch": 0.3537524053880693,
      "grad_norm": 0.700814257534255,
      "learning_rate": 0.00016326988264388624,
      "loss": 0.634,
      "step": 5515
    },
    {
      "epoch": 0.354073123797306,
      "grad_norm": 0.8079984489578869,
      "learning_rate": 0.0001631831477338266,
      "loss": 0.5378,
      "step": 5520
    },
    {
      "epoch": 0.35439384220654263,
      "grad_norm": 1.0368102707808613,
      "learning_rate": 0.00016309633363497503,
      "loss": 0.6121,
      "step": 5525
    },
    {
      "epoch": 0.35471456061577933,
      "grad_norm": 1.0720279870828384,
      "learning_rate": 0.00016300944045613745,
      "loss": 0.615,
      "step": 5530
    },
    {
      "epoch": 0.355035279025016,
      "grad_norm": 0.6936759908598535,
      "learning_rate": 0.00016292246830621897,
      "loss": 0.7186,
      "step": 5535
    },
    {
      "epoch": 0.3553559974342527,
      "grad_norm": 0.8578757956070833,
      "learning_rate": 0.00016283541729422368,
      "loss": 0.6859,
      "step": 5540
    },
    {
      "epoch": 0.3556767158434894,
      "grad_norm": 0.6299846194893505,
      "learning_rate": 0.0001627482875292544,
      "loss": 0.7011,
      "step": 5545
    },
    {
      "epoch": 0.3559974342527261,
      "grad_norm": 2.8465820906119697,
      "learning_rate": 0.00016266107912051275,
      "loss": 0.6824,
      "step": 5550
    },
    {
      "epoch": 0.3563181526619628,
      "grad_norm": 0.8212652492805361,
      "learning_rate": 0.00016257379217729897,
      "loss": 0.7353,
      "step": 5555
    },
    {
      "epoch": 0.35663887107119946,
      "grad_norm": 0.8592127708286107,
      "learning_rate": 0.00016248642680901157,
      "loss": 0.7493,
      "step": 5560
    },
    {
      "epoch": 0.35695958948043616,
      "grad_norm": 1.5401896960046906,
      "learning_rate": 0.00016239898312514747,
      "loss": 0.6233,
      "step": 5565
    },
    {
      "epoch": 0.35728030788967285,
      "grad_norm": 0.9880669672357292,
      "learning_rate": 0.00016231146123530169,
      "loss": 0.7483,
      "step": 5570
    },
    {
      "epoch": 0.35760102629890955,
      "grad_norm": 1.0054106975653296,
      "learning_rate": 0.00016222386124916733,
      "loss": 0.7477,
      "step": 5575
    },
    {
      "epoch": 0.35792174470814625,
      "grad_norm": 0.8851121102484797,
      "learning_rate": 0.0001621361832765353,
      "loss": 0.7338,
      "step": 5580
    },
    {
      "epoch": 0.35824246311738295,
      "grad_norm": 0.7868381457390292,
      "learning_rate": 0.0001620484274272943,
      "loss": 0.8315,
      "step": 5585
    },
    {
      "epoch": 0.35856318152661965,
      "grad_norm": 2.2302567668996907,
      "learning_rate": 0.00016196059381143056,
      "loss": 0.6057,
      "step": 5590
    },
    {
      "epoch": 0.35888389993585634,
      "grad_norm": 0.8632558537630518,
      "learning_rate": 0.0001618726825390279,
      "loss": 0.6017,
      "step": 5595
    },
    {
      "epoch": 0.359204618345093,
      "grad_norm": 0.9301897471057365,
      "learning_rate": 0.0001617846937202674,
      "loss": 0.7127,
      "step": 5600
    },
    {
      "epoch": 0.3595253367543297,
      "grad_norm": 1.0314386924705863,
      "learning_rate": 0.00016169662746542724,
      "loss": 0.6471,
      "step": 5605
    },
    {
      "epoch": 0.3598460551635664,
      "grad_norm": 0.7527220509268685,
      "learning_rate": 0.00016160848388488283,
      "loss": 0.5149,
      "step": 5610
    },
    {
      "epoch": 0.3601667735728031,
      "grad_norm": 0.9964259981347259,
      "learning_rate": 0.0001615202630891064,
      "loss": 0.7551,
      "step": 5615
    },
    {
      "epoch": 0.3604874919820398,
      "grad_norm": 0.9534877288363439,
      "learning_rate": 0.0001614319651886669,
      "loss": 0.7869,
      "step": 5620
    },
    {
      "epoch": 0.3608082103912765,
      "grad_norm": 0.6624325233415048,
      "learning_rate": 0.00016134359029423004,
      "loss": 0.6187,
      "step": 5625
    },
    {
      "epoch": 0.36112892880051317,
      "grad_norm": 1.1438885759745019,
      "learning_rate": 0.000161255138516558,
      "loss": 0.6818,
      "step": 5630
    },
    {
      "epoch": 0.3614496472097498,
      "grad_norm": 1.0060076302436596,
      "learning_rate": 0.00016116660996650918,
      "loss": 0.7134,
      "step": 5635
    },
    {
      "epoch": 0.3617703656189865,
      "grad_norm": 0.824054815580278,
      "learning_rate": 0.0001610780047550384,
      "loss": 0.6322,
      "step": 5640
    },
    {
      "epoch": 0.3620910840282232,
      "grad_norm": 1.1593592610393137,
      "learning_rate": 0.00016098932299319642,
      "loss": 0.6549,
      "step": 5645
    },
    {
      "epoch": 0.3624118024374599,
      "grad_norm": 1.3453462014445998,
      "learning_rate": 0.00016090056479213,
      "loss": 0.6626,
      "step": 5650
    },
    {
      "epoch": 0.3627325208466966,
      "grad_norm": 0.6303823430985745,
      "learning_rate": 0.00016081173026308168,
      "loss": 0.6129,
      "step": 5655
    },
    {
      "epoch": 0.3630532392559333,
      "grad_norm": 0.9682139214042652,
      "learning_rate": 0.00016072281951738974,
      "loss": 0.5327,
      "step": 5660
    },
    {
      "epoch": 0.36337395766517,
      "grad_norm": 0.6265113009833752,
      "learning_rate": 0.00016063383266648788,
      "loss": 0.7972,
      "step": 5665
    },
    {
      "epoch": 0.3636946760744067,
      "grad_norm": 1.0602611989591288,
      "learning_rate": 0.0001605447698219052,
      "loss": 0.7568,
      "step": 5670
    },
    {
      "epoch": 0.36401539448364334,
      "grad_norm": 0.8085898565934937,
      "learning_rate": 0.0001604556310952661,
      "loss": 0.7088,
      "step": 5675
    },
    {
      "epoch": 0.36433611289288004,
      "grad_norm": 0.9259612439090465,
      "learning_rate": 0.00016036641659829005,
      "loss": 0.6433,
      "step": 5680
    },
    {
      "epoch": 0.36465683130211674,
      "grad_norm": 1.0560925548902709,
      "learning_rate": 0.00016027712644279147,
      "loss": 0.6389,
      "step": 5685
    },
    {
      "epoch": 0.36497754971135343,
      "grad_norm": 0.9202003497456687,
      "learning_rate": 0.00016018776074067965,
      "loss": 0.6588,
      "step": 5690
    },
    {
      "epoch": 0.36529826812059013,
      "grad_norm": 0.7606894269431724,
      "learning_rate": 0.00016009831960395854,
      "loss": 0.6249,
      "step": 5695
    },
    {
      "epoch": 0.36561898652982683,
      "grad_norm": 1.0194051743569745,
      "learning_rate": 0.00016000880314472662,
      "loss": 0.7063,
      "step": 5700
    },
    {
      "epoch": 0.3659397049390635,
      "grad_norm": 0.8971345599358044,
      "learning_rate": 0.0001599192114751768,
      "loss": 0.7758,
      "step": 5705
    },
    {
      "epoch": 0.36626042334830017,
      "grad_norm": 0.8114509690004853,
      "learning_rate": 0.0001598295447075962,
      "loss": 0.687,
      "step": 5710
    },
    {
      "epoch": 0.36658114175753687,
      "grad_norm": 1.1086821486366683,
      "learning_rate": 0.00015973980295436613,
      "loss": 0.7663,
      "step": 5715
    },
    {
      "epoch": 0.36690186016677356,
      "grad_norm": 0.8305079494288046,
      "learning_rate": 0.00015964998632796187,
      "loss": 0.7841,
      "step": 5720
    },
    {
      "epoch": 0.36722257857601026,
      "grad_norm": 0.9332565471912556,
      "learning_rate": 0.00015956009494095245,
      "loss": 0.7629,
      "step": 5725
    },
    {
      "epoch": 0.36754329698524696,
      "grad_norm": 1.2026329331281138,
      "learning_rate": 0.00015947012890600072,
      "loss": 0.6034,
      "step": 5730
    },
    {
      "epoch": 0.36786401539448366,
      "grad_norm": 0.8890367793012931,
      "learning_rate": 0.00015938008833586307,
      "loss": 0.673,
      "step": 5735
    },
    {
      "epoch": 0.36818473380372035,
      "grad_norm": 1.1168519576569294,
      "learning_rate": 0.00015928997334338924,
      "loss": 0.7265,
      "step": 5740
    },
    {
      "epoch": 0.368505452212957,
      "grad_norm": 0.7323689106049717,
      "learning_rate": 0.00015919978404152225,
      "loss": 0.5286,
      "step": 5745
    },
    {
      "epoch": 0.3688261706221937,
      "grad_norm": 0.7491408637491445,
      "learning_rate": 0.00015910952054329832,
      "loss": 0.6603,
      "step": 5750
    },
    {
      "epoch": 0.3691468890314304,
      "grad_norm": 0.5720787370255552,
      "learning_rate": 0.00015901918296184664,
      "loss": 0.7637,
      "step": 5755
    },
    {
      "epoch": 0.3694676074406671,
      "grad_norm": 1.247050118094861,
      "learning_rate": 0.00015892877141038917,
      "loss": 0.6643,
      "step": 5760
    },
    {
      "epoch": 0.3697883258499038,
      "grad_norm": 0.8428619170851901,
      "learning_rate": 0.00015883828600224073,
      "loss": 0.603,
      "step": 5765
    },
    {
      "epoch": 0.3701090442591405,
      "grad_norm": 0.6414166600611392,
      "learning_rate": 0.00015874772685080853,
      "loss": 0.6775,
      "step": 5770
    },
    {
      "epoch": 0.3704297626683772,
      "grad_norm": 1.39629472630112,
      "learning_rate": 0.0001586570940695924,
      "loss": 0.7512,
      "step": 5775
    },
    {
      "epoch": 0.3707504810776139,
      "grad_norm": 1.0547557813661854,
      "learning_rate": 0.00015856638777218422,
      "loss": 0.7574,
      "step": 5780
    },
    {
      "epoch": 0.3710711994868505,
      "grad_norm": 0.8689805862522758,
      "learning_rate": 0.00015847560807226823,
      "loss": 0.6427,
      "step": 5785
    },
    {
      "epoch": 0.3713919178960872,
      "grad_norm": 1.068120678282078,
      "learning_rate": 0.00015838475508362051,
      "loss": 0.7343,
      "step": 5790
    },
    {
      "epoch": 0.3717126363053239,
      "grad_norm": 0.8164191154263224,
      "learning_rate": 0.00015829382892010912,
      "loss": 0.7685,
      "step": 5795
    },
    {
      "epoch": 0.3720333547145606,
      "grad_norm": 0.9769245060606544,
      "learning_rate": 0.00015820282969569374,
      "loss": 0.6804,
      "step": 5800
    },
    {
      "epoch": 0.3723540731237973,
      "grad_norm": 0.676619842133273,
      "learning_rate": 0.00015811175752442562,
      "loss": 0.7244,
      "step": 5805
    },
    {
      "epoch": 0.372674791533034,
      "grad_norm": 3.577185251797483,
      "learning_rate": 0.00015802061252044748,
      "loss": 0.7426,
      "step": 5810
    },
    {
      "epoch": 0.3729955099422707,
      "grad_norm": 0.5176738358349613,
      "learning_rate": 0.00015792939479799333,
      "loss": 0.6545,
      "step": 5815
    },
    {
      "epoch": 0.37331622835150735,
      "grad_norm": 0.9510093482774353,
      "learning_rate": 0.00015783810447138826,
      "loss": 0.6358,
      "step": 5820
    },
    {
      "epoch": 0.37363694676074405,
      "grad_norm": 0.8940071414235186,
      "learning_rate": 0.0001577467416550484,
      "loss": 0.7573,
      "step": 5825
    },
    {
      "epoch": 0.37395766516998075,
      "grad_norm": 0.8502887517010003,
      "learning_rate": 0.0001576553064634807,
      "loss": 0.6371,
      "step": 5830
    },
    {
      "epoch": 0.37427838357921744,
      "grad_norm": 0.7260357322627535,
      "learning_rate": 0.00015756379901128294,
      "loss": 0.6106,
      "step": 5835
    },
    {
      "epoch": 0.37459910198845414,
      "grad_norm": 0.5018237254264993,
      "learning_rate": 0.00015747221941314325,
      "loss": 0.6329,
      "step": 5840
    },
    {
      "epoch": 0.37491982039769084,
      "grad_norm": 0.9130075924966622,
      "learning_rate": 0.00015738056778384038,
      "loss": 0.6868,
      "step": 5845
    },
    {
      "epoch": 0.37524053880692754,
      "grad_norm": 0.803836499340597,
      "learning_rate": 0.00015728884423824323,
      "loss": 0.5845,
      "step": 5850
    },
    {
      "epoch": 0.37556125721616423,
      "grad_norm": 0.7604942646833414,
      "learning_rate": 0.0001571970488913109,
      "loss": 0.6911,
      "step": 5855
    },
    {
      "epoch": 0.3758819756254009,
      "grad_norm": 0.6458258737911328,
      "learning_rate": 0.00015710518185809246,
      "loss": 0.5681,
      "step": 5860
    },
    {
      "epoch": 0.3762026940346376,
      "grad_norm": 1.4247194077938075,
      "learning_rate": 0.00015701324325372688,
      "loss": 0.7889,
      "step": 5865
    },
    {
      "epoch": 0.3765234124438743,
      "grad_norm": 0.9972586435085499,
      "learning_rate": 0.00015692123319344272,
      "loss": 0.5962,
      "step": 5870
    },
    {
      "epoch": 0.37684413085311097,
      "grad_norm": 0.8022131053222762,
      "learning_rate": 0.0001568291517925582,
      "loss": 0.7065,
      "step": 5875
    },
    {
      "epoch": 0.37716484926234767,
      "grad_norm": 1.0767684802416355,
      "learning_rate": 0.00015673699916648085,
      "loss": 0.5781,
      "step": 5880
    },
    {
      "epoch": 0.37748556767158437,
      "grad_norm": 0.9496229847137114,
      "learning_rate": 0.00015664477543070757,
      "loss": 0.7056,
      "step": 5885
    },
    {
      "epoch": 0.37780628608082106,
      "grad_norm": 0.9633177655644503,
      "learning_rate": 0.00015655248070082438,
      "loss": 0.6939,
      "step": 5890
    },
    {
      "epoch": 0.3781270044900577,
      "grad_norm": 1.115479048562113,
      "learning_rate": 0.00015646011509250617,
      "loss": 0.7378,
      "step": 5895
    },
    {
      "epoch": 0.3784477228992944,
      "grad_norm": 0.7941062299537334,
      "learning_rate": 0.0001563676787215168,
      "loss": 0.5145,
      "step": 5900
    },
    {
      "epoch": 0.3787684413085311,
      "grad_norm": 0.7169731124858206,
      "learning_rate": 0.0001562751717037087,
      "loss": 0.5164,
      "step": 5905
    },
    {
      "epoch": 0.3790891597177678,
      "grad_norm": 0.844339179385339,
      "learning_rate": 0.00015618259415502291,
      "loss": 0.7001,
      "step": 5910
    },
    {
      "epoch": 0.3794098781270045,
      "grad_norm": 0.8954099088632127,
      "learning_rate": 0.00015608994619148886,
      "loss": 0.7601,
      "step": 5915
    },
    {
      "epoch": 0.3797305965362412,
      "grad_norm": 0.9177657222066289,
      "learning_rate": 0.00015599722792922425,
      "loss": 0.6568,
      "step": 5920
    },
    {
      "epoch": 0.3800513149454779,
      "grad_norm": 0.6243318997688123,
      "learning_rate": 0.00015590443948443482,
      "loss": 0.696,
      "step": 5925
    },
    {
      "epoch": 0.38037203335471453,
      "grad_norm": 1.4596235194468596,
      "learning_rate": 0.00015581158097341435,
      "loss": 0.5778,
      "step": 5930
    },
    {
      "epoch": 0.38069275176395123,
      "grad_norm": 1.2871148477384242,
      "learning_rate": 0.0001557186525125444,
      "loss": 0.6818,
      "step": 5935
    },
    {
      "epoch": 0.38101347017318793,
      "grad_norm": 0.999208910914117,
      "learning_rate": 0.00015562565421829415,
      "loss": 0.763,
      "step": 5940
    },
    {
      "epoch": 0.3813341885824246,
      "grad_norm": 0.7364320418930576,
      "learning_rate": 0.0001555325862072204,
      "loss": 0.5347,
      "step": 5945
    },
    {
      "epoch": 0.3816549069916613,
      "grad_norm": 1.2608428725949008,
      "learning_rate": 0.0001554394485959673,
      "loss": 0.7863,
      "step": 5950
    },
    {
      "epoch": 0.381975625400898,
      "grad_norm": 1.1072984033964586,
      "learning_rate": 0.00015534624150126617,
      "loss": 0.6498,
      "step": 5955
    },
    {
      "epoch": 0.3822963438101347,
      "grad_norm": 1.058590608018293,
      "learning_rate": 0.00015525296503993548,
      "loss": 0.5703,
      "step": 5960
    },
    {
      "epoch": 0.3826170622193714,
      "grad_norm": 1.0908171799744935,
      "learning_rate": 0.0001551596193288806,
      "loss": 0.7091,
      "step": 5965
    },
    {
      "epoch": 0.38293778062860806,
      "grad_norm": 0.947173201252904,
      "learning_rate": 0.0001550662044850937,
      "loss": 0.7283,
      "step": 5970
    },
    {
      "epoch": 0.38325849903784476,
      "grad_norm": 1.3607073347278296,
      "learning_rate": 0.00015497272062565362,
      "loss": 0.6388,
      "step": 5975
    },
    {
      "epoch": 0.38357921744708146,
      "grad_norm": 0.6419239829629664,
      "learning_rate": 0.0001548791678677257,
      "loss": 0.6622,
      "step": 5980
    },
    {
      "epoch": 0.38389993585631815,
      "grad_norm": 0.7871877023929343,
      "learning_rate": 0.0001547855463285616,
      "loss": 0.6371,
      "step": 5985
    },
    {
      "epoch": 0.38422065426555485,
      "grad_norm": 0.8230439216037291,
      "learning_rate": 0.00015469185612549917,
      "loss": 0.6582,
      "step": 5990
    },
    {
      "epoch": 0.38454137267479155,
      "grad_norm": 0.9173584864551694,
      "learning_rate": 0.00015459809737596237,
      "loss": 0.6135,
      "step": 5995
    },
    {
      "epoch": 0.38486209108402825,
      "grad_norm": 0.8582018157822806,
      "learning_rate": 0.0001545042701974611,
      "loss": 0.7084,
      "step": 6000
    },
    {
      "epoch": 0.3851828094932649,
      "grad_norm": 1.2042820074202123,
      "learning_rate": 0.0001544103747075909,
      "loss": 0.8395,
      "step": 6005
    },
    {
      "epoch": 0.3855035279025016,
      "grad_norm": 1.1522620963550567,
      "learning_rate": 0.00015431641102403302,
      "loss": 0.7,
      "step": 6010
    },
    {
      "epoch": 0.3858242463117383,
      "grad_norm": 1.2293665111038254,
      "learning_rate": 0.00015422237926455417,
      "loss": 0.8011,
      "step": 6015
    },
    {
      "epoch": 0.386144964720975,
      "grad_norm": 0.8550912373038257,
      "learning_rate": 0.00015412827954700632,
      "loss": 0.7712,
      "step": 6020
    },
    {
      "epoch": 0.3864656831302117,
      "grad_norm": 1.216484260092555,
      "learning_rate": 0.00015403411198932672,
      "loss": 0.5951,
      "step": 6025
    },
    {
      "epoch": 0.3867864015394484,
      "grad_norm": 1.0566678632706732,
      "learning_rate": 0.00015393987670953756,
      "loss": 0.6986,
      "step": 6030
    },
    {
      "epoch": 0.3871071199486851,
      "grad_norm": 0.9868957913863856,
      "learning_rate": 0.00015384557382574595,
      "loss": 0.583,
      "step": 6035
    },
    {
      "epoch": 0.38742783835792177,
      "grad_norm": 0.7263178308133398,
      "learning_rate": 0.0001537512034561437,
      "loss": 0.7377,
      "step": 6040
    },
    {
      "epoch": 0.3877485567671584,
      "grad_norm": 0.8464297973296825,
      "learning_rate": 0.00015365676571900725,
      "loss": 0.6738,
      "step": 6045
    },
    {
      "epoch": 0.3880692751763951,
      "grad_norm": 0.754402075351536,
      "learning_rate": 0.00015356226073269736,
      "loss": 0.8025,
      "step": 6050
    },
    {
      "epoch": 0.3883899935856318,
      "grad_norm": 1.0080496408230515,
      "learning_rate": 0.0001534676886156592,
      "loss": 0.6925,
      "step": 6055
    },
    {
      "epoch": 0.3887107119948685,
      "grad_norm": 0.859538871785963,
      "learning_rate": 0.000153373049486422,
      "loss": 0.6198,
      "step": 6060
    },
    {
      "epoch": 0.3890314304041052,
      "grad_norm": 0.617907781420839,
      "learning_rate": 0.0001532783434635991,
      "loss": 0.708,
      "step": 6065
    },
    {
      "epoch": 0.3893521488133419,
      "grad_norm": 0.9321179061358089,
      "learning_rate": 0.00015318357066588747,
      "loss": 0.8021,
      "step": 6070
    },
    {
      "epoch": 0.3896728672225786,
      "grad_norm": 1.0543925706918078,
      "learning_rate": 0.00015308873121206798,
      "loss": 0.6394,
      "step": 6075
    },
    {
      "epoch": 0.38999358563181524,
      "grad_norm": 0.7635204958993133,
      "learning_rate": 0.00015299382522100484,
      "loss": 0.7279,
      "step": 6080
    },
    {
      "epoch": 0.39031430404105194,
      "grad_norm": 0.4808058715738424,
      "learning_rate": 0.00015289885281164587,
      "loss": 0.6074,
      "step": 6085
    },
    {
      "epoch": 0.39063502245028864,
      "grad_norm": 0.8001578622671749,
      "learning_rate": 0.00015280381410302197,
      "loss": 0.7391,
      "step": 6090
    },
    {
      "epoch": 0.39095574085952534,
      "grad_norm": 0.6800874640636567,
      "learning_rate": 0.00015270870921424721,
      "loss": 0.6633,
      "step": 6095
    },
    {
      "epoch": 0.39127645926876203,
      "grad_norm": 1.6563190981003801,
      "learning_rate": 0.00015261353826451858,
      "loss": 0.5687,
      "step": 6100
    },
    {
      "epoch": 0.39159717767799873,
      "grad_norm": 1.541052532833452,
      "learning_rate": 0.00015251830137311587,
      "loss": 0.7656,
      "step": 6105
    },
    {
      "epoch": 0.39191789608723543,
      "grad_norm": 0.8841300600956734,
      "learning_rate": 0.00015242299865940147,
      "loss": 0.5984,
      "step": 6110
    },
    {
      "epoch": 0.39223861449647207,
      "grad_norm": 1.2069800426391173,
      "learning_rate": 0.00015232763024282034,
      "loss": 0.8064,
      "step": 6115
    },
    {
      "epoch": 0.39255933290570877,
      "grad_norm": 1.12582638671757,
      "learning_rate": 0.00015223219624289978,
      "loss": 0.7329,
      "step": 6120
    },
    {
      "epoch": 0.39288005131494547,
      "grad_norm": 0.8200206186838468,
      "learning_rate": 0.0001521366967792493,
      "loss": 0.5894,
      "step": 6125
    },
    {
      "epoch": 0.39320076972418216,
      "grad_norm": 0.8420632536848158,
      "learning_rate": 0.0001520411319715603,
      "loss": 0.7387,
      "step": 6130
    },
    {
      "epoch": 0.39352148813341886,
      "grad_norm": 0.8067132371420835,
      "learning_rate": 0.00015194550193960632,
      "loss": 0.682,
      "step": 6135
    },
    {
      "epoch": 0.39384220654265556,
      "grad_norm": 0.7708975305048692,
      "learning_rate": 0.00015184980680324248,
      "loss": 0.68,
      "step": 6140
    },
    {
      "epoch": 0.39416292495189226,
      "grad_norm": 1.0673984272805985,
      "learning_rate": 0.00015175404668240554,
      "loss": 0.765,
      "step": 6145
    },
    {
      "epoch": 0.39448364336112896,
      "grad_norm": 1.3041455682451786,
      "learning_rate": 0.00015165822169711373,
      "loss": 0.6576,
      "step": 6150
    },
    {
      "epoch": 0.3948043617703656,
      "grad_norm": 0.6831544367344609,
      "learning_rate": 0.00015156233196746653,
      "loss": 0.7366,
      "step": 6155
    },
    {
      "epoch": 0.3951250801796023,
      "grad_norm": 0.9906492347644728,
      "learning_rate": 0.00015146637761364457,
      "loss": 0.7104,
      "step": 6160
    },
    {
      "epoch": 0.395445798588839,
      "grad_norm": 0.8542271989350849,
      "learning_rate": 0.00015137035875590956,
      "loss": 0.6678,
      "step": 6165
    },
    {
      "epoch": 0.3957665169980757,
      "grad_norm": 1.542102394105923,
      "learning_rate": 0.00015127427551460396,
      "loss": 0.665,
      "step": 6170
    },
    {
      "epoch": 0.3960872354073124,
      "grad_norm": 0.8016623705576872,
      "learning_rate": 0.00015117812801015095,
      "loss": 0.5812,
      "step": 6175
    },
    {
      "epoch": 0.3964079538165491,
      "grad_norm": 1.2073109631978751,
      "learning_rate": 0.00015108191636305427,
      "loss": 0.7527,
      "step": 6180
    },
    {
      "epoch": 0.3967286722257858,
      "grad_norm": 0.8328169453200382,
      "learning_rate": 0.000150985640693898,
      "loss": 0.6733,
      "step": 6185
    },
    {
      "epoch": 0.3970493906350224,
      "grad_norm": 0.9951192780366616,
      "learning_rate": 0.00015088930112334653,
      "loss": 0.733,
      "step": 6190
    },
    {
      "epoch": 0.3973701090442591,
      "grad_norm": 0.7405889864532202,
      "learning_rate": 0.0001507928977721443,
      "loss": 0.5478,
      "step": 6195
    },
    {
      "epoch": 0.3976908274534958,
      "grad_norm": 1.080626962102723,
      "learning_rate": 0.0001506964307611157,
      "loss": 0.6115,
      "step": 6200
    },
    {
      "epoch": 0.3980115458627325,
      "grad_norm": 0.7995884570597525,
      "learning_rate": 0.0001505999002111649,
      "loss": 0.5829,
      "step": 6205
    },
    {
      "epoch": 0.3983322642719692,
      "grad_norm": 0.4992231946350308,
      "learning_rate": 0.0001505033062432757,
      "loss": 0.5649,
      "step": 6210
    },
    {
      "epoch": 0.3986529826812059,
      "grad_norm": 0.8489355096183382,
      "learning_rate": 0.00015040664897851138,
      "loss": 0.7291,
      "step": 6215
    },
    {
      "epoch": 0.3989737010904426,
      "grad_norm": 1.136002981763331,
      "learning_rate": 0.00015030992853801454,
      "loss": 0.7918,
      "step": 6220
    },
    {
      "epoch": 0.39929441949967925,
      "grad_norm": 0.895880595156802,
      "learning_rate": 0.00015021314504300704,
      "loss": 0.5635,
      "step": 6225
    },
    {
      "epoch": 0.39961513790891595,
      "grad_norm": 0.8226243605298355,
      "learning_rate": 0.0001501162986147897,
      "loss": 0.815,
      "step": 6230
    },
    {
      "epoch": 0.39993585631815265,
      "grad_norm": 0.9921294907910895,
      "learning_rate": 0.00015001938937474218,
      "loss": 0.7156,
      "step": 6235
    },
    {
      "epoch": 0.40025657472738935,
      "grad_norm": 0.9510451771447491,
      "learning_rate": 0.0001499224174443229,
      "loss": 0.681,
      "step": 6240
    },
    {
      "epoch": 0.40057729313662604,
      "grad_norm": 0.9952627757450367,
      "learning_rate": 0.0001498253829450689,
      "loss": 0.712,
      "step": 6245
    },
    {
      "epoch": 0.40089801154586274,
      "grad_norm": 0.6514927458391138,
      "learning_rate": 0.00014972828599859556,
      "loss": 0.633,
      "step": 6250
    },
    {
      "epoch": 0.40121872995509944,
      "grad_norm": 0.9621219480196492,
      "learning_rate": 0.0001496311267265966,
      "loss": 0.6988,
      "step": 6255
    },
    {
      "epoch": 0.40153944836433614,
      "grad_norm": 1.0155290688557055,
      "learning_rate": 0.00014953390525084377,
      "loss": 0.7093,
      "step": 6260
    },
    {
      "epoch": 0.4018601667735728,
      "grad_norm": 0.6507458129551235,
      "learning_rate": 0.00014943662169318686,
      "loss": 0.6781,
      "step": 6265
    },
    {
      "epoch": 0.4021808851828095,
      "grad_norm": 0.8206284722853324,
      "learning_rate": 0.00014933927617555342,
      "loss": 0.6472,
      "step": 6270
    },
    {
      "epoch": 0.4025016035920462,
      "grad_norm": 0.969513442832448,
      "learning_rate": 0.00014924186881994867,
      "loss": 0.6322,
      "step": 6275
    },
    {
      "epoch": 0.4028223220012829,
      "grad_norm": 1.0110426378326145,
      "learning_rate": 0.00014914439974845532,
      "loss": 0.6192,
      "step": 6280
    },
    {
      "epoch": 0.40314304041051957,
      "grad_norm": 0.9182180122329154,
      "learning_rate": 0.0001490468690832335,
      "loss": 0.7624,
      "step": 6285
    },
    {
      "epoch": 0.40346375881975627,
      "grad_norm": 1.0221754081762093,
      "learning_rate": 0.00014894927694652046,
      "loss": 0.5685,
      "step": 6290
    },
    {
      "epoch": 0.40378447722899297,
      "grad_norm": 0.7951566985169003,
      "learning_rate": 0.00014885162346063048,
      "loss": 0.6114,
      "step": 6295
    },
    {
      "epoch": 0.4041051956382296,
      "grad_norm": 0.9205666830852229,
      "learning_rate": 0.00014875390874795482,
      "loss": 0.6126,
      "step": 6300
    },
    {
      "epoch": 0.4044259140474663,
      "grad_norm": 0.8495232187331296,
      "learning_rate": 0.00014865613293096132,
      "loss": 0.6743,
      "step": 6305
    },
    {
      "epoch": 0.404746632456703,
      "grad_norm": 0.5863050150246784,
      "learning_rate": 0.0001485582961321946,
      "loss": 0.5965,
      "step": 6310
    },
    {
      "epoch": 0.4050673508659397,
      "grad_norm": 0.732145223215556,
      "learning_rate": 0.00014846039847427563,
      "loss": 0.6549,
      "step": 6315
    },
    {
      "epoch": 0.4053880692751764,
      "grad_norm": 0.7872248738108987,
      "learning_rate": 0.00014836244007990156,
      "loss": 0.675,
      "step": 6320
    },
    {
      "epoch": 0.4057087876844131,
      "grad_norm": 0.6983906622235366,
      "learning_rate": 0.0001482644210718458,
      "loss": 0.6684,
      "step": 6325
    },
    {
      "epoch": 0.4060295060936498,
      "grad_norm": 1.036082877660743,
      "learning_rate": 0.0001481663415729576,
      "loss": 0.6682,
      "step": 6330
    },
    {
      "epoch": 0.4063502245028865,
      "grad_norm": 0.8176112608665335,
      "learning_rate": 0.00014806820170616222,
      "loss": 0.8555,
      "step": 6335
    },
    {
      "epoch": 0.40667094291212313,
      "grad_norm": 0.7770154320072936,
      "learning_rate": 0.00014797000159446038,
      "loss": 0.557,
      "step": 6340
    },
    {
      "epoch": 0.40699166132135983,
      "grad_norm": 1.5604043527138882,
      "learning_rate": 0.00014787174136092837,
      "loss": 0.5678,
      "step": 6345
    },
    {
      "epoch": 0.40731237973059653,
      "grad_norm": 0.5000651713384456,
      "learning_rate": 0.00014777342112871786,
      "loss": 0.6323,
      "step": 6350
    },
    {
      "epoch": 0.4076330981398332,
      "grad_norm": 0.7129539414804645,
      "learning_rate": 0.0001476750410210557,
      "loss": 0.6531,
      "step": 6355
    },
    {
      "epoch": 0.4079538165490699,
      "grad_norm": 0.6838741535402209,
      "learning_rate": 0.0001475766011612438,
      "loss": 0.6734,
      "step": 6360
    },
    {
      "epoch": 0.4082745349583066,
      "grad_norm": 0.6003288340459018,
      "learning_rate": 0.00014747810167265894,
      "loss": 0.5793,
      "step": 6365
    },
    {
      "epoch": 0.4085952533675433,
      "grad_norm": 1.5754948140455838,
      "learning_rate": 0.00014737954267875263,
      "loss": 0.702,
      "step": 6370
    },
    {
      "epoch": 0.40891597177677996,
      "grad_norm": 1.0150345516766142,
      "learning_rate": 0.000147280924303051,
      "loss": 0.8569,
      "step": 6375
    },
    {
      "epoch": 0.40923669018601666,
      "grad_norm": 1.0034479899495579,
      "learning_rate": 0.0001471822466691545,
      "loss": 0.8446,
      "step": 6380
    },
    {
      "epoch": 0.40955740859525336,
      "grad_norm": 0.9184425443953635,
      "learning_rate": 0.00014708350990073798,
      "loss": 0.6602,
      "step": 6385
    },
    {
      "epoch": 0.40987812700449006,
      "grad_norm": 0.6284500041695303,
      "learning_rate": 0.0001469847141215503,
      "loss": 0.7291,
      "step": 6390
    },
    {
      "epoch": 0.41019884541372675,
      "grad_norm": 0.9039981636058719,
      "learning_rate": 0.0001468858594554144,
      "loss": 0.8008,
      "step": 6395
    },
    {
      "epoch": 0.41051956382296345,
      "grad_norm": 0.9662431864347534,
      "learning_rate": 0.0001467869460262269,
      "loss": 0.5989,
      "step": 6400
    },
    {
      "epoch": 0.41084028223220015,
      "grad_norm": 0.6824016883811361,
      "learning_rate": 0.00014668797395795812,
      "loss": 0.7651,
      "step": 6405
    },
    {
      "epoch": 0.4111610006414368,
      "grad_norm": 0.8325307304433841,
      "learning_rate": 0.00014658894337465187,
      "loss": 0.762,
      "step": 6410
    },
    {
      "epoch": 0.4114817190506735,
      "grad_norm": 0.6873637896445222,
      "learning_rate": 0.00014648985440042533,
      "loss": 0.6868,
      "step": 6415
    },
    {
      "epoch": 0.4118024374599102,
      "grad_norm": 0.8851369890257763,
      "learning_rate": 0.0001463907071594688,
      "loss": 0.719,
      "step": 6420
    },
    {
      "epoch": 0.4121231558691469,
      "grad_norm": 0.8755806997147045,
      "learning_rate": 0.00014629150177604565,
      "loss": 0.6161,
      "step": 6425
    },
    {
      "epoch": 0.4124438742783836,
      "grad_norm": 0.9956221559599793,
      "learning_rate": 0.00014619223837449211,
      "loss": 0.6246,
      "step": 6430
    },
    {
      "epoch": 0.4127645926876203,
      "grad_norm": 0.9146462627716199,
      "learning_rate": 0.00014609291707921713,
      "loss": 0.665,
      "step": 6435
    },
    {
      "epoch": 0.413085311096857,
      "grad_norm": 0.7096303973864491,
      "learning_rate": 0.0001459935380147022,
      "loss": 0.7379,
      "step": 6440
    },
    {
      "epoch": 0.4134060295060937,
      "grad_norm": 0.8414445373385668,
      "learning_rate": 0.00014589410130550124,
      "loss": 0.7533,
      "step": 6445
    },
    {
      "epoch": 0.4137267479153303,
      "grad_norm": 1.1009718984925583,
      "learning_rate": 0.0001457946070762404,
      "loss": 0.673,
      "step": 6450
    },
    {
      "epoch": 0.414047466324567,
      "grad_norm": 0.9982085240192685,
      "learning_rate": 0.000145695055451618,
      "loss": 0.6951,
      "step": 6455
    },
    {
      "epoch": 0.4143681847338037,
      "grad_norm": 0.7828125692520432,
      "learning_rate": 0.00014559544655640412,
      "loss": 0.7779,
      "step": 6460
    },
    {
      "epoch": 0.4146889031430404,
      "grad_norm": 1.0323696606312884,
      "learning_rate": 0.0001454957805154408,
      "loss": 0.666,
      "step": 6465
    },
    {
      "epoch": 0.4150096215522771,
      "grad_norm": 0.6618186643447491,
      "learning_rate": 0.00014539605745364156,
      "loss": 0.7354,
      "step": 6470
    },
    {
      "epoch": 0.4153303399615138,
      "grad_norm": 1.3747337158411725,
      "learning_rate": 0.00014529627749599146,
      "loss": 0.7191,
      "step": 6475
    },
    {
      "epoch": 0.4156510583707505,
      "grad_norm": 0.6208342823219867,
      "learning_rate": 0.0001451964407675469,
      "loss": 0.648,
      "step": 6480
    },
    {
      "epoch": 0.41597177677998715,
      "grad_norm": 1.0319199547152835,
      "learning_rate": 0.00014509654739343534,
      "loss": 0.7808,
      "step": 6485
    },
    {
      "epoch": 0.41629249518922384,
      "grad_norm": 1.1954322026767323,
      "learning_rate": 0.0001449965974988553,
      "loss": 0.7695,
      "step": 6490
    },
    {
      "epoch": 0.41661321359846054,
      "grad_norm": 1.2496250479975701,
      "learning_rate": 0.00014489659120907615,
      "loss": 0.6214,
      "step": 6495
    },
    {
      "epoch": 0.41693393200769724,
      "grad_norm": 0.6983577410015246,
      "learning_rate": 0.00014479652864943788,
      "loss": 0.6312,
      "step": 6500
    },
    {
      "epoch": 0.41725465041693394,
      "grad_norm": 0.8629680447857923,
      "learning_rate": 0.0001446964099453511,
      "loss": 0.7508,
      "step": 6505
    },
    {
      "epoch": 0.41757536882617063,
      "grad_norm": 1.0627571850045838,
      "learning_rate": 0.00014459623522229662,
      "loss": 0.7044,
      "step": 6510
    },
    {
      "epoch": 0.41789608723540733,
      "grad_norm": 0.900748883113857,
      "learning_rate": 0.00014449600460582563,
      "loss": 0.7454,
      "step": 6515
    },
    {
      "epoch": 0.41821680564464403,
      "grad_norm": 0.9690669274483888,
      "learning_rate": 0.00014439571822155934,
      "loss": 0.5726,
      "step": 6520
    },
    {
      "epoch": 0.41853752405388067,
      "grad_norm": 1.0487104357417287,
      "learning_rate": 0.00014429537619518873,
      "loss": 0.799,
      "step": 6525
    },
    {
      "epoch": 0.41885824246311737,
      "grad_norm": 1.2154258394073059,
      "learning_rate": 0.0001441949786524747,
      "loss": 0.5219,
      "step": 6530
    },
    {
      "epoch": 0.41917896087235407,
      "grad_norm": 0.7794877423395151,
      "learning_rate": 0.0001440945257192476,
      "loss": 0.5707,
      "step": 6535
    },
    {
      "epoch": 0.41949967928159076,
      "grad_norm": 0.5347206233594262,
      "learning_rate": 0.00014399401752140728,
      "loss": 0.55,
      "step": 6540
    },
    {
      "epoch": 0.41982039769082746,
      "grad_norm": 0.6862664210890991,
      "learning_rate": 0.00014389345418492272,
      "loss": 0.7803,
      "step": 6545
    },
    {
      "epoch": 0.42014111610006416,
      "grad_norm": 1.1652827614213763,
      "learning_rate": 0.0001437928358358322,
      "loss": 0.6907,
      "step": 6550
    },
    {
      "epoch": 0.42046183450930086,
      "grad_norm": 1.0771142450313498,
      "learning_rate": 0.00014369216260024282,
      "loss": 0.5868,
      "step": 6555
    },
    {
      "epoch": 0.4207825529185375,
      "grad_norm": 0.7384688516596317,
      "learning_rate": 0.00014359143460433046,
      "loss": 0.5754,
      "step": 6560
    },
    {
      "epoch": 0.4211032713277742,
      "grad_norm": 0.7961839635706309,
      "learning_rate": 0.00014349065197433977,
      "loss": 0.6247,
      "step": 6565
    },
    {
      "epoch": 0.4214239897370109,
      "grad_norm": 0.9321579239285109,
      "learning_rate": 0.0001433898148365837,
      "loss": 0.6856,
      "step": 6570
    },
    {
      "epoch": 0.4217447081462476,
      "grad_norm": 0.7081449574427117,
      "learning_rate": 0.00014328892331744362,
      "loss": 0.5893,
      "step": 6575
    },
    {
      "epoch": 0.4220654265554843,
      "grad_norm": 0.9200227580239932,
      "learning_rate": 0.000143187977543369,
      "loss": 0.661,
      "step": 6580
    },
    {
      "epoch": 0.422386144964721,
      "grad_norm": 1.1330174896855054,
      "learning_rate": 0.00014308697764087738,
      "loss": 0.8342,
      "step": 6585
    },
    {
      "epoch": 0.4227068633739577,
      "grad_norm": 0.851200673216541,
      "learning_rate": 0.00014298592373655414,
      "loss": 0.8357,
      "step": 6590
    },
    {
      "epoch": 0.42302758178319433,
      "grad_norm": 0.6342829663427049,
      "learning_rate": 0.00014288481595705217,
      "loss": 0.4643,
      "step": 6595
    },
    {
      "epoch": 0.423348300192431,
      "grad_norm": 0.5823246535632486,
      "learning_rate": 0.00014278365442909214,
      "loss": 0.6472,
      "step": 6600
    },
    {
      "epoch": 0.4236690186016677,
      "grad_norm": 1.0907798326084035,
      "learning_rate": 0.0001426824392794619,
      "loss": 0.5667,
      "step": 6605
    },
    {
      "epoch": 0.4239897370109044,
      "grad_norm": 0.6171485537285203,
      "learning_rate": 0.00014258117063501658,
      "loss": 0.7975,
      "step": 6610
    },
    {
      "epoch": 0.4243104554201411,
      "grad_norm": 0.920203490173087,
      "learning_rate": 0.00014247984862267833,
      "loss": 0.5432,
      "step": 6615
    },
    {
      "epoch": 0.4246311738293778,
      "grad_norm": 0.6838928556102262,
      "learning_rate": 0.0001423784733694362,
      "loss": 0.5982,
      "step": 6620
    },
    {
      "epoch": 0.4249518922386145,
      "grad_norm": 1.2229051146263923,
      "learning_rate": 0.00014227704500234599,
      "loss": 0.8164,
      "step": 6625
    },
    {
      "epoch": 0.4252726106478512,
      "grad_norm": 0.7990664572540562,
      "learning_rate": 0.00014217556364853006,
      "loss": 0.7974,
      "step": 6630
    },
    {
      "epoch": 0.42559332905708785,
      "grad_norm": 1.439913710180236,
      "learning_rate": 0.00014207402943517707,
      "loss": 0.6574,
      "step": 6635
    },
    {
      "epoch": 0.42591404746632455,
      "grad_norm": 1.5833188763841297,
      "learning_rate": 0.0001419724424895421,
      "loss": 0.6127,
      "step": 6640
    },
    {
      "epoch": 0.42623476587556125,
      "grad_norm": 1.0972694183324532,
      "learning_rate": 0.00014187080293894623,
      "loss": 0.6384,
      "step": 6645
    },
    {
      "epoch": 0.42655548428479795,
      "grad_norm": 0.7755437327444886,
      "learning_rate": 0.0001417691109107765,
      "loss": 0.6467,
      "step": 6650
    },
    {
      "epoch": 0.42687620269403465,
      "grad_norm": 0.7307053147732903,
      "learning_rate": 0.00014166736653248568,
      "loss": 0.6857,
      "step": 6655
    },
    {
      "epoch": 0.42719692110327134,
      "grad_norm": 1.1129466425839534,
      "learning_rate": 0.00014156556993159215,
      "loss": 0.6325,
      "step": 6660
    },
    {
      "epoch": 0.42751763951250804,
      "grad_norm": 1.0829046562773215,
      "learning_rate": 0.00014146372123567986,
      "loss": 0.4627,
      "step": 6665
    },
    {
      "epoch": 0.4278383579217447,
      "grad_norm": 0.7286117691400992,
      "learning_rate": 0.00014136182057239788,
      "loss": 0.7129,
      "step": 6670
    },
    {
      "epoch": 0.4281590763309814,
      "grad_norm": 0.9030468850448815,
      "learning_rate": 0.00014125986806946052,
      "loss": 0.6249,
      "step": 6675
    },
    {
      "epoch": 0.4284797947402181,
      "grad_norm": 1.0569038979072376,
      "learning_rate": 0.00014115786385464704,
      "loss": 0.5753,
      "step": 6680
    },
    {
      "epoch": 0.4288005131494548,
      "grad_norm": 1.9939364349504531,
      "learning_rate": 0.0001410558080558015,
      "loss": 0.6928,
      "step": 6685
    },
    {
      "epoch": 0.4291212315586915,
      "grad_norm": 0.7638304398434881,
      "learning_rate": 0.00014095370080083262,
      "loss": 0.7665,
      "step": 6690
    },
    {
      "epoch": 0.42944194996792817,
      "grad_norm": 1.0470546825430735,
      "learning_rate": 0.00014085154221771362,
      "loss": 0.5786,
      "step": 6695
    },
    {
      "epoch": 0.42976266837716487,
      "grad_norm": 1.122127513476166,
      "learning_rate": 0.00014074933243448203,
      "loss": 0.5162,
      "step": 6700
    },
    {
      "epoch": 0.43008338678640157,
      "grad_norm": 0.9808616961072774,
      "learning_rate": 0.00014064707157923956,
      "loss": 0.5722,
      "step": 6705
    },
    {
      "epoch": 0.4304041051956382,
      "grad_norm": 0.8653107924861354,
      "learning_rate": 0.00014054475978015192,
      "loss": 0.6378,
      "step": 6710
    },
    {
      "epoch": 0.4307248236048749,
      "grad_norm": 0.8962127595984706,
      "learning_rate": 0.00014044239716544868,
      "loss": 0.6408,
      "step": 6715
    },
    {
      "epoch": 0.4310455420141116,
      "grad_norm": 0.8365357084309853,
      "learning_rate": 0.00014033998386342312,
      "loss": 0.6256,
      "step": 6720
    },
    {
      "epoch": 0.4313662604233483,
      "grad_norm": 1.0863245013957081,
      "learning_rate": 0.000140237520002432,
      "loss": 0.7068,
      "step": 6725
    },
    {
      "epoch": 0.431686978832585,
      "grad_norm": 0.662268709969254,
      "learning_rate": 0.0001401350057108955,
      "loss": 0.7573,
      "step": 6730
    },
    {
      "epoch": 0.4320076972418217,
      "grad_norm": 1.1715222453521494,
      "learning_rate": 0.0001400324411172969,
      "loss": 0.7574,
      "step": 6735
    },
    {
      "epoch": 0.4323284156510584,
      "grad_norm": 0.9424425544184805,
      "learning_rate": 0.0001399298263501827,
      "loss": 0.8143,
      "step": 6740
    },
    {
      "epoch": 0.43264913406029504,
      "grad_norm": 0.7955537139368879,
      "learning_rate": 0.00013982716153816213,
      "loss": 0.5263,
      "step": 6745
    },
    {
      "epoch": 0.43296985246953174,
      "grad_norm": 0.9554382885880205,
      "learning_rate": 0.00013972444680990722,
      "loss": 0.6976,
      "step": 6750
    },
    {
      "epoch": 0.43329057087876843,
      "grad_norm": 1.5328515613064213,
      "learning_rate": 0.00013962168229415253,
      "loss": 0.627,
      "step": 6755
    },
    {
      "epoch": 0.43361128928800513,
      "grad_norm": 1.096222900496091,
      "learning_rate": 0.00013951886811969501,
      "loss": 0.8235,
      "step": 6760
    },
    {
      "epoch": 0.43393200769724183,
      "grad_norm": 1.3093624744883847,
      "learning_rate": 0.00013941600441539392,
      "loss": 0.5996,
      "step": 6765
    },
    {
      "epoch": 0.4342527261064785,
      "grad_norm": 0.9375701783813489,
      "learning_rate": 0.00013931309131017046,
      "loss": 0.8571,
      "step": 6770
    },
    {
      "epoch": 0.4345734445157152,
      "grad_norm": 0.8981486814038466,
      "learning_rate": 0.0001392101289330079,
      "loss": 0.7036,
      "step": 6775
    },
    {
      "epoch": 0.43489416292495187,
      "grad_norm": 1.030290589948871,
      "learning_rate": 0.00013910711741295113,
      "loss": 0.5523,
      "step": 6780
    },
    {
      "epoch": 0.43521488133418856,
      "grad_norm": 0.7783354813811616,
      "learning_rate": 0.00013900405687910676,
      "loss": 0.6957,
      "step": 6785
    },
    {
      "epoch": 0.43553559974342526,
      "grad_norm": 0.7762966668183622,
      "learning_rate": 0.00013890094746064273,
      "loss": 0.7249,
      "step": 6790
    },
    {
      "epoch": 0.43585631815266196,
      "grad_norm": 1.0757163547744426,
      "learning_rate": 0.0001387977892867883,
      "loss": 0.7033,
      "step": 6795
    },
    {
      "epoch": 0.43617703656189866,
      "grad_norm": 0.9414991837160046,
      "learning_rate": 0.00013869458248683377,
      "loss": 0.6503,
      "step": 6800
    },
    {
      "epoch": 0.43649775497113535,
      "grad_norm": 1.1367581767585646,
      "learning_rate": 0.0001385913271901305,
      "loss": 0.6653,
      "step": 6805
    },
    {
      "epoch": 0.43681847338037205,
      "grad_norm": 0.9718072928244804,
      "learning_rate": 0.0001384880235260905,
      "loss": 0.6126,
      "step": 6810
    },
    {
      "epoch": 0.43713919178960875,
      "grad_norm": 1.051631260475179,
      "learning_rate": 0.00013838467162418652,
      "loss": 0.7529,
      "step": 6815
    },
    {
      "epoch": 0.4374599101988454,
      "grad_norm": 1.1255123924704187,
      "learning_rate": 0.00013828127161395165,
      "loss": 0.7,
      "step": 6820
    },
    {
      "epoch": 0.4377806286080821,
      "grad_norm": 0.6159074752294377,
      "learning_rate": 0.00013817782362497938,
      "loss": 0.7815,
      "step": 6825
    },
    {
      "epoch": 0.4381013470173188,
      "grad_norm": 0.7651323158439101,
      "learning_rate": 0.00013807432778692333,
      "loss": 0.6508,
      "step": 6830
    },
    {
      "epoch": 0.4384220654265555,
      "grad_norm": 1.49661820735196,
      "learning_rate": 0.00013797078422949697,
      "loss": 0.6949,
      "step": 6835
    },
    {
      "epoch": 0.4387427838357922,
      "grad_norm": 0.9888853439915466,
      "learning_rate": 0.0001378671930824737,
      "loss": 0.6223,
      "step": 6840
    },
    {
      "epoch": 0.4390635022450289,
      "grad_norm": 1.248537199848208,
      "learning_rate": 0.00013776355447568648,
      "loss": 0.8024,
      "step": 6845
    },
    {
      "epoch": 0.4393842206542656,
      "grad_norm": 0.9575631631075234,
      "learning_rate": 0.00013765986853902783,
      "loss": 0.6739,
      "step": 6850
    },
    {
      "epoch": 0.4397049390635022,
      "grad_norm": 0.9680343975909423,
      "learning_rate": 0.00013755613540244958,
      "loss": 0.6917,
      "step": 6855
    },
    {
      "epoch": 0.4400256574727389,
      "grad_norm": 1.117269566951374,
      "learning_rate": 0.00013745235519596263,
      "loss": 0.7042,
      "step": 6860
    },
    {
      "epoch": 0.4403463758819756,
      "grad_norm": 0.8619372703069825,
      "learning_rate": 0.00013734852804963703,
      "loss": 0.609,
      "step": 6865
    },
    {
      "epoch": 0.4406670942912123,
      "grad_norm": 0.8117525588458958,
      "learning_rate": 0.00013724465409360148,
      "loss": 0.6981,
      "step": 6870
    },
    {
      "epoch": 0.440987812700449,
      "grad_norm": 1.01398403519154,
      "learning_rate": 0.0001371407334580434,
      "loss": 0.6151,
      "step": 6875
    },
    {
      "epoch": 0.4413085311096857,
      "grad_norm": 0.834092658374222,
      "learning_rate": 0.00013703676627320886,
      "loss": 0.7673,
      "step": 6880
    },
    {
      "epoch": 0.4416292495189224,
      "grad_norm": 1.5311945048848135,
      "learning_rate": 0.00013693275266940207,
      "loss": 0.7119,
      "step": 6885
    },
    {
      "epoch": 0.44194996792815905,
      "grad_norm": 1.527540376439275,
      "learning_rate": 0.00013682869277698557,
      "loss": 0.6265,
      "step": 6890
    },
    {
      "epoch": 0.44227068633739575,
      "grad_norm": 0.7951368893260018,
      "learning_rate": 0.00013672458672637984,
      "loss": 0.8016,
      "step": 6895
    },
    {
      "epoch": 0.44259140474663244,
      "grad_norm": 1.2763559389048758,
      "learning_rate": 0.0001366204346480632,
      "loss": 0.7206,
      "step": 6900
    },
    {
      "epoch": 0.44291212315586914,
      "grad_norm": 0.8023255338282319,
      "learning_rate": 0.00013651623667257164,
      "loss": 0.7554,
      "step": 6905
    },
    {
      "epoch": 0.44323284156510584,
      "grad_norm": 0.8695350841504818,
      "learning_rate": 0.00013641199293049877,
      "loss": 0.8358,
      "step": 6910
    },
    {
      "epoch": 0.44355355997434254,
      "grad_norm": 0.9044131348318595,
      "learning_rate": 0.0001363077035524955,
      "loss": 0.6412,
      "step": 6915
    },
    {
      "epoch": 0.44387427838357923,
      "grad_norm": 0.8127899752297872,
      "learning_rate": 0.00013620336866926997,
      "loss": 0.6957,
      "step": 6920
    },
    {
      "epoch": 0.44419499679281593,
      "grad_norm": 0.8688512997555105,
      "learning_rate": 0.00013609898841158725,
      "loss": 0.724,
      "step": 6925
    },
    {
      "epoch": 0.4445157152020526,
      "grad_norm": 0.8760877608220616,
      "learning_rate": 0.0001359945629102694,
      "loss": 0.5738,
      "step": 6930
    },
    {
      "epoch": 0.4448364336112893,
      "grad_norm": 1.0325674004426306,
      "learning_rate": 0.0001358900922961951,
      "loss": 0.5873,
      "step": 6935
    },
    {
      "epoch": 0.44515715202052597,
      "grad_norm": 0.8467908302129974,
      "learning_rate": 0.00013578557670029966,
      "loss": 0.7058,
      "step": 6940
    },
    {
      "epoch": 0.44547787042976267,
      "grad_norm": 0.8131400613232301,
      "learning_rate": 0.00013568101625357465,
      "loss": 0.7422,
      "step": 6945
    },
    {
      "epoch": 0.44579858883899937,
      "grad_norm": 0.724722516850653,
      "learning_rate": 0.000135576411087068,
      "loss": 0.6638,
      "step": 6950
    },
    {
      "epoch": 0.44611930724823606,
      "grad_norm": 0.8948898208956525,
      "learning_rate": 0.00013547176133188354,
      "loss": 0.7129,
      "step": 6955
    },
    {
      "epoch": 0.44644002565747276,
      "grad_norm": 1.0104789290655904,
      "learning_rate": 0.00013536706711918107,
      "loss": 0.7032,
      "step": 6960
    },
    {
      "epoch": 0.4467607440667094,
      "grad_norm": 0.8414717932992289,
      "learning_rate": 0.0001352623285801761,
      "loss": 0.6836,
      "step": 6965
    },
    {
      "epoch": 0.4470814624759461,
      "grad_norm": 1.1406826410807314,
      "learning_rate": 0.00013515754584613962,
      "loss": 0.6053,
      "step": 6970
    },
    {
      "epoch": 0.4474021808851828,
      "grad_norm": 0.8742591243812547,
      "learning_rate": 0.00013505271904839817,
      "loss": 0.7431,
      "step": 6975
    },
    {
      "epoch": 0.4477228992944195,
      "grad_norm": 0.6939509932441673,
      "learning_rate": 0.00013494784831833337,
      "loss": 0.6291,
      "step": 6980
    },
    {
      "epoch": 0.4480436177036562,
      "grad_norm": 1.1945030623029917,
      "learning_rate": 0.00013484293378738193,
      "loss": 0.6403,
      "step": 6985
    },
    {
      "epoch": 0.4483643361128929,
      "grad_norm": 1.2041604733537394,
      "learning_rate": 0.0001347379755870355,
      "loss": 0.7259,
      "step": 6990
    },
    {
      "epoch": 0.4486850545221296,
      "grad_norm": 1.2915007724773113,
      "learning_rate": 0.00013463297384884047,
      "loss": 0.659,
      "step": 6995
    },
    {
      "epoch": 0.4490057729313663,
      "grad_norm": 0.9604685032866782,
      "learning_rate": 0.00013452792870439774,
      "loss": 0.7607,
      "step": 7000
    },
    {
      "epoch": 0.44932649134060293,
      "grad_norm": 0.683575690655945,
      "learning_rate": 0.00013442284028536265,
      "loss": 0.6597,
      "step": 7005
    },
    {
      "epoch": 0.4496472097498396,
      "grad_norm": 0.8599337861042293,
      "learning_rate": 0.0001343177087234447,
      "loss": 0.6324,
      "step": 7010
    },
    {
      "epoch": 0.4499679281590763,
      "grad_norm": 1.0590394622444155,
      "learning_rate": 0.00013421253415040764,
      "loss": 0.7187,
      "step": 7015
    },
    {
      "epoch": 0.450288646568313,
      "grad_norm": 0.7304239044871675,
      "learning_rate": 0.00013410731669806893,
      "loss": 0.6951,
      "step": 7020
    },
    {
      "epoch": 0.4506093649775497,
      "grad_norm": 0.6027716061436601,
      "learning_rate": 0.00013400205649829986,
      "loss": 0.6254,
      "step": 7025
    },
    {
      "epoch": 0.4509300833867864,
      "grad_norm": 0.9290585913030099,
      "learning_rate": 0.00013389675368302538,
      "loss": 0.6395,
      "step": 7030
    },
    {
      "epoch": 0.4512508017960231,
      "grad_norm": 0.6100444770178587,
      "learning_rate": 0.00013379140838422368,
      "loss": 0.6956,
      "step": 7035
    },
    {
      "epoch": 0.45157152020525976,
      "grad_norm": 1.0560462270870308,
      "learning_rate": 0.00013368602073392626,
      "loss": 0.7217,
      "step": 7040
    },
    {
      "epoch": 0.45189223861449646,
      "grad_norm": 0.9506970796048375,
      "learning_rate": 0.00013358059086421777,
      "loss": 0.7538,
      "step": 7045
    },
    {
      "epoch": 0.45221295702373315,
      "grad_norm": 0.8472683366273123,
      "learning_rate": 0.0001334751189072357,
      "loss": 0.7699,
      "step": 7050
    },
    {
      "epoch": 0.45253367543296985,
      "grad_norm": 0.8123297983190807,
      "learning_rate": 0.00013336960499517035,
      "loss": 0.7617,
      "step": 7055
    },
    {
      "epoch": 0.45285439384220655,
      "grad_norm": 0.7432610908008688,
      "learning_rate": 0.00013326404926026453,
      "loss": 0.4966,
      "step": 7060
    },
    {
      "epoch": 0.45317511225144325,
      "grad_norm": 1.9038556869996193,
      "learning_rate": 0.00013315845183481352,
      "loss": 0.7716,
      "step": 7065
    },
    {
      "epoch": 0.45349583066067994,
      "grad_norm": 1.517420207283064,
      "learning_rate": 0.0001330528128511648,
      "loss": 0.7335,
      "step": 7070
    },
    {
      "epoch": 0.4538165490699166,
      "grad_norm": 0.8901376925504432,
      "learning_rate": 0.00013294713244171798,
      "loss": 0.6803,
      "step": 7075
    },
    {
      "epoch": 0.4541372674791533,
      "grad_norm": 0.9458291501306725,
      "learning_rate": 0.0001328414107389246,
      "loss": 0.8463,
      "step": 7080
    },
    {
      "epoch": 0.45445798588839,
      "grad_norm": 0.771925264607674,
      "learning_rate": 0.00013273564787528796,
      "loss": 0.6271,
      "step": 7085
    },
    {
      "epoch": 0.4547787042976267,
      "grad_norm": 0.9552006861914584,
      "learning_rate": 0.00013262984398336287,
      "loss": 0.6903,
      "step": 7090
    },
    {
      "epoch": 0.4550994227068634,
      "grad_norm": 0.7912142730312611,
      "learning_rate": 0.00013252399919575565,
      "loss": 0.7355,
      "step": 7095
    },
    {
      "epoch": 0.4554201411161001,
      "grad_norm": 0.8790500769675236,
      "learning_rate": 0.0001324181136451238,
      "loss": 0.6732,
      "step": 7100
    },
    {
      "epoch": 0.45574085952533677,
      "grad_norm": 1.2386079454717946,
      "learning_rate": 0.00013231218746417595,
      "loss": 0.7522,
      "step": 7105
    },
    {
      "epoch": 0.45606157793457347,
      "grad_norm": 0.7962051132713993,
      "learning_rate": 0.0001322062207856717,
      "loss": 0.8145,
      "step": 7110
    },
    {
      "epoch": 0.4563822963438101,
      "grad_norm": 1.0329953407444796,
      "learning_rate": 0.00013210021374242134,
      "loss": 0.7769,
      "step": 7115
    },
    {
      "epoch": 0.4567030147530468,
      "grad_norm": 0.9259650281367799,
      "learning_rate": 0.00013199416646728573,
      "loss": 0.6457,
      "step": 7120
    },
    {
      "epoch": 0.4570237331622835,
      "grad_norm": 0.9088503892075743,
      "learning_rate": 0.0001318880790931762,
      "loss": 0.6294,
      "step": 7125
    },
    {
      "epoch": 0.4573444515715202,
      "grad_norm": 0.8985892524046365,
      "learning_rate": 0.00013178195175305438,
      "loss": 0.6828,
      "step": 7130
    },
    {
      "epoch": 0.4576651699807569,
      "grad_norm": 0.912515537663532,
      "learning_rate": 0.00013167578457993188,
      "loss": 0.7064,
      "step": 7135
    },
    {
      "epoch": 0.4579858883899936,
      "grad_norm": 0.9729614181574077,
      "learning_rate": 0.0001315695777068703,
      "loss": 0.7272,
      "step": 7140
    },
    {
      "epoch": 0.4583066067992303,
      "grad_norm": 0.6424734919666812,
      "learning_rate": 0.00013146333126698103,
      "loss": 0.6299,
      "step": 7145
    },
    {
      "epoch": 0.45862732520846694,
      "grad_norm": 0.9359545383993509,
      "learning_rate": 0.00013135704539342494,
      "loss": 0.6424,
      "step": 7150
    },
    {
      "epoch": 0.45894804361770364,
      "grad_norm": 0.7928212174336042,
      "learning_rate": 0.00013125072021941248,
      "loss": 0.6982,
      "step": 7155
    },
    {
      "epoch": 0.45926876202694034,
      "grad_norm": 0.5352504172374731,
      "learning_rate": 0.00013114435587820316,
      "loss": 0.5291,
      "step": 7160
    },
    {
      "epoch": 0.45958948043617703,
      "grad_norm": 0.7128732592198029,
      "learning_rate": 0.00013103795250310577,
      "loss": 0.7029,
      "step": 7165
    },
    {
      "epoch": 0.45991019884541373,
      "grad_norm": 1.0850764381783637,
      "learning_rate": 0.00013093151022747793,
      "loss": 0.7707,
      "step": 7170
    },
    {
      "epoch": 0.46023091725465043,
      "grad_norm": 1.0237223555264552,
      "learning_rate": 0.000130825029184726,
      "loss": 0.6769,
      "step": 7175
    },
    {
      "epoch": 0.4605516356638871,
      "grad_norm": 1.1136242211182483,
      "learning_rate": 0.00013071850950830492,
      "loss": 0.5703,
      "step": 7180
    },
    {
      "epoch": 0.4608723540731238,
      "grad_norm": 0.8143443059526504,
      "learning_rate": 0.00013061195133171814,
      "loss": 0.6334,
      "step": 7185
    },
    {
      "epoch": 0.46119307248236047,
      "grad_norm": 0.9509973045912795,
      "learning_rate": 0.00013050535478851728,
      "loss": 0.6757,
      "step": 7190
    },
    {
      "epoch": 0.46151379089159716,
      "grad_norm": 0.6191444236173257,
      "learning_rate": 0.00013039872001230208,
      "loss": 0.6217,
      "step": 7195
    },
    {
      "epoch": 0.46183450930083386,
      "grad_norm": 0.7788953363838352,
      "learning_rate": 0.00013029204713672015,
      "loss": 0.7384,
      "step": 7200
    },
    {
      "epoch": 0.46215522771007056,
      "grad_norm": 0.8450930304171778,
      "learning_rate": 0.00013018533629546695,
      "loss": 0.7298,
      "step": 7205
    },
    {
      "epoch": 0.46247594611930726,
      "grad_norm": 1.0385186485500146,
      "learning_rate": 0.0001300785876222854,
      "loss": 0.6529,
      "step": 7210
    },
    {
      "epoch": 0.46279666452854396,
      "grad_norm": 0.9152190048763487,
      "learning_rate": 0.00012997180125096596,
      "loss": 0.4276,
      "step": 7215
    },
    {
      "epoch": 0.46311738293778065,
      "grad_norm": 0.9787836443016305,
      "learning_rate": 0.00012986497731534618,
      "loss": 0.63,
      "step": 7220
    },
    {
      "epoch": 0.4634381013470173,
      "grad_norm": 0.9734043537474775,
      "learning_rate": 0.00012975811594931094,
      "loss": 0.7634,
      "step": 7225
    },
    {
      "epoch": 0.463758819756254,
      "grad_norm": 0.9713910942202003,
      "learning_rate": 0.00012965121728679175,
      "loss": 0.757,
      "step": 7230
    },
    {
      "epoch": 0.4640795381654907,
      "grad_norm": 0.9081157831943877,
      "learning_rate": 0.00012954428146176703,
      "loss": 0.7426,
      "step": 7235
    },
    {
      "epoch": 0.4644002565747274,
      "grad_norm": 0.7116758820381245,
      "learning_rate": 0.00012943730860826174,
      "loss": 0.8052,
      "step": 7240
    },
    {
      "epoch": 0.4647209749839641,
      "grad_norm": 0.8501864866133851,
      "learning_rate": 0.00012933029886034723,
      "loss": 0.7407,
      "step": 7245
    },
    {
      "epoch": 0.4650416933932008,
      "grad_norm": 0.9701598818030126,
      "learning_rate": 0.00012922325235214114,
      "loss": 0.672,
      "step": 7250
    },
    {
      "epoch": 0.4653624118024375,
      "grad_norm": 0.7147413441513334,
      "learning_rate": 0.00012911616921780717,
      "loss": 0.572,
      "step": 7255
    },
    {
      "epoch": 0.4656831302116741,
      "grad_norm": 1.1031756310087157,
      "learning_rate": 0.00012900904959155482,
      "loss": 0.502,
      "step": 7260
    },
    {
      "epoch": 0.4660038486209108,
      "grad_norm": 0.9549539883250536,
      "learning_rate": 0.0001289018936076395,
      "loss": 0.7697,
      "step": 7265
    },
    {
      "epoch": 0.4663245670301475,
      "grad_norm": 0.7061368474604979,
      "learning_rate": 0.00012879470140036205,
      "loss": 0.77,
      "step": 7270
    },
    {
      "epoch": 0.4666452854393842,
      "grad_norm": 0.8174054654625066,
      "learning_rate": 0.00012868747310406875,
      "loss": 0.644,
      "step": 7275
    },
    {
      "epoch": 0.4669660038486209,
      "grad_norm": 1.0847763653058102,
      "learning_rate": 0.00012858020885315118,
      "loss": 0.6265,
      "step": 7280
    },
    {
      "epoch": 0.4672867222578576,
      "grad_norm": 0.7498493863919715,
      "learning_rate": 0.00012847290878204584,
      "loss": 0.6246,
      "step": 7285
    },
    {
      "epoch": 0.4676074406670943,
      "grad_norm": 0.981941482754815,
      "learning_rate": 0.0001283655730252343,
      "loss": 0.6622,
      "step": 7290
    },
    {
      "epoch": 0.467928159076331,
      "grad_norm": 0.9518018861299121,
      "learning_rate": 0.00012825820171724267,
      "loss": 0.6284,
      "step": 7295
    },
    {
      "epoch": 0.46824887748556765,
      "grad_norm": 0.8663834243061985,
      "learning_rate": 0.00012815079499264178,
      "loss": 0.5667,
      "step": 7300
    },
    {
      "epoch": 0.46856959589480435,
      "grad_norm": 0.7672027770311252,
      "learning_rate": 0.00012804335298604672,
      "loss": 0.7221,
      "step": 7305
    },
    {
      "epoch": 0.46889031430404104,
      "grad_norm": 0.8035416637587046,
      "learning_rate": 0.00012793587583211693,
      "loss": 0.5737,
      "step": 7310
    },
    {
      "epoch": 0.46921103271327774,
      "grad_norm": 0.7309561664000054,
      "learning_rate": 0.00012782836366555578,
      "loss": 0.6313,
      "step": 7315
    },
    {
      "epoch": 0.46953175112251444,
      "grad_norm": 0.6252749910832299,
      "learning_rate": 0.00012772081662111053,
      "loss": 0.6736,
      "step": 7320
    },
    {
      "epoch": 0.46985246953175114,
      "grad_norm": 1.025835083057594,
      "learning_rate": 0.00012761323483357227,
      "loss": 0.5665,
      "step": 7325
    },
    {
      "epoch": 0.47017318794098784,
      "grad_norm": 0.6525095712503345,
      "learning_rate": 0.00012750561843777552,
      "loss": 0.6443,
      "step": 7330
    },
    {
      "epoch": 0.4704939063502245,
      "grad_norm": 0.7418969128305869,
      "learning_rate": 0.00012739796756859825,
      "loss": 0.8236,
      "step": 7335
    },
    {
      "epoch": 0.4708146247594612,
      "grad_norm": 1.0413884397203683,
      "learning_rate": 0.00012729028236096155,
      "loss": 0.6624,
      "step": 7340
    },
    {
      "epoch": 0.4711353431686979,
      "grad_norm": 0.9159067009468284,
      "learning_rate": 0.0001271825629498296,
      "loss": 0.6376,
      "step": 7345
    },
    {
      "epoch": 0.47145606157793457,
      "grad_norm": 0.5992387879000995,
      "learning_rate": 0.0001270748094702095,
      "loss": 0.5685,
      "step": 7350
    },
    {
      "epoch": 0.47177677998717127,
      "grad_norm": 1.7163402868588182,
      "learning_rate": 0.00012696702205715088,
      "loss": 0.5311,
      "step": 7355
    },
    {
      "epoch": 0.47209749839640797,
      "grad_norm": 0.7926851445802399,
      "learning_rate": 0.00012685920084574618,
      "loss": 0.7548,
      "step": 7360
    },
    {
      "epoch": 0.47241821680564466,
      "grad_norm": 0.9751658539863987,
      "learning_rate": 0.0001267513459711299,
      "loss": 0.6665,
      "step": 7365
    },
    {
      "epoch": 0.47273893521488136,
      "grad_norm": 1.0752483823874541,
      "learning_rate": 0.00012664345756847892,
      "loss": 0.583,
      "step": 7370
    },
    {
      "epoch": 0.473059653624118,
      "grad_norm": 1.0127918776763205,
      "learning_rate": 0.00012653553577301202,
      "loss": 0.749,
      "step": 7375
    },
    {
      "epoch": 0.4733803720333547,
      "grad_norm": 0.9059323990908674,
      "learning_rate": 0.00012642758071999,
      "loss": 0.7049,
      "step": 7380
    },
    {
      "epoch": 0.4737010904425914,
      "grad_norm": 0.8259800182390388,
      "learning_rate": 0.00012631959254471515,
      "loss": 0.6771,
      "step": 7385
    },
    {
      "epoch": 0.4740218088518281,
      "grad_norm": 1.47432552983105,
      "learning_rate": 0.00012621157138253142,
      "loss": 0.5965,
      "step": 7390
    },
    {
      "epoch": 0.4743425272610648,
      "grad_norm": 0.9830245238116091,
      "learning_rate": 0.00012610351736882402,
      "loss": 0.7302,
      "step": 7395
    },
    {
      "epoch": 0.4746632456703015,
      "grad_norm": 0.9860227904680734,
      "learning_rate": 0.00012599543063901935,
      "loss": 0.6942,
      "step": 7400
    },
    {
      "epoch": 0.4749839640795382,
      "grad_norm": 0.9011424798066042,
      "learning_rate": 0.00012588731132858486,
      "loss": 0.6456,
      "step": 7405
    },
    {
      "epoch": 0.47530468248877483,
      "grad_norm": 0.9091580384346607,
      "learning_rate": 0.00012577915957302872,
      "loss": 0.6091,
      "step": 7410
    },
    {
      "epoch": 0.47562540089801153,
      "grad_norm": 0.9741008974793179,
      "learning_rate": 0.00012567097550789997,
      "loss": 0.6012,
      "step": 7415
    },
    {
      "epoch": 0.4759461193072482,
      "grad_norm": 0.9602884477063278,
      "learning_rate": 0.00012556275926878789,
      "loss": 0.6792,
      "step": 7420
    },
    {
      "epoch": 0.4762668377164849,
      "grad_norm": 0.6210052131474215,
      "learning_rate": 0.00012545451099132225,
      "loss": 0.6193,
      "step": 7425
    },
    {
      "epoch": 0.4765875561257216,
      "grad_norm": 0.8832670583789428,
      "learning_rate": 0.000125346230811173,
      "loss": 0.6106,
      "step": 7430
    },
    {
      "epoch": 0.4769082745349583,
      "grad_norm": 0.851189577398919,
      "learning_rate": 0.00012523791886404986,
      "loss": 0.8305,
      "step": 7435
    },
    {
      "epoch": 0.477228992944195,
      "grad_norm": 1.2879732211506167,
      "learning_rate": 0.00012512957528570265,
      "loss": 0.5887,
      "step": 7440
    },
    {
      "epoch": 0.47754971135343166,
      "grad_norm": 0.5699068076911031,
      "learning_rate": 0.0001250212002119207,
      "loss": 0.5558,
      "step": 7445
    },
    {
      "epoch": 0.47787042976266836,
      "grad_norm": 1.1918583269997756,
      "learning_rate": 0.00012491279377853268,
      "loss": 0.6408,
      "step": 7450
    },
    {
      "epoch": 0.47819114817190506,
      "grad_norm": 1.4317720523654553,
      "learning_rate": 0.0001248043561214068,
      "loss": 0.6172,
      "step": 7455
    },
    {
      "epoch": 0.47851186658114175,
      "grad_norm": 1.0666113380037154,
      "learning_rate": 0.0001246958873764503,
      "loss": 0.7485,
      "step": 7460
    },
    {
      "epoch": 0.47883258499037845,
      "grad_norm": 1.2123844625766853,
      "learning_rate": 0.00012458738767960937,
      "loss": 0.7277,
      "step": 7465
    },
    {
      "epoch": 0.47915330339961515,
      "grad_norm": 0.6850700187680755,
      "learning_rate": 0.00012447885716686892,
      "loss": 0.6412,
      "step": 7470
    },
    {
      "epoch": 0.47947402180885185,
      "grad_norm": 0.7818905955159324,
      "learning_rate": 0.00012437029597425268,
      "loss": 0.6845,
      "step": 7475
    },
    {
      "epoch": 0.47979474021808854,
      "grad_norm": 0.7985800895037933,
      "learning_rate": 0.00012426170423782265,
      "loss": 0.7376,
      "step": 7480
    },
    {
      "epoch": 0.4801154586273252,
      "grad_norm": 1.4988959271026578,
      "learning_rate": 0.0001241530820936792,
      "loss": 0.6025,
      "step": 7485
    },
    {
      "epoch": 0.4804361770365619,
      "grad_norm": 0.7532644364170019,
      "learning_rate": 0.00012404442967796077,
      "loss": 0.7597,
      "step": 7490
    },
    {
      "epoch": 0.4807568954457986,
      "grad_norm": 0.9781127180520404,
      "learning_rate": 0.0001239357471268438,
      "loss": 0.7113,
      "step": 7495
    },
    {
      "epoch": 0.4810776138550353,
      "grad_norm": 1.2808191157193494,
      "learning_rate": 0.00012382703457654247,
      "loss": 0.7197,
      "step": 7500
    },
    {
      "epoch": 0.481398332264272,
      "grad_norm": 0.9577008167614253,
      "learning_rate": 0.00012371829216330842,
      "loss": 0.6633,
      "step": 7505
    },
    {
      "epoch": 0.4817190506735087,
      "grad_norm": 0.9163574634981259,
      "learning_rate": 0.000123609520023431,
      "loss": 0.6577,
      "step": 7510
    },
    {
      "epoch": 0.4820397690827454,
      "grad_norm": 0.9436379402563304,
      "learning_rate": 0.00012350071829323657,
      "loss": 0.665,
      "step": 7515
    },
    {
      "epoch": 0.482360487491982,
      "grad_norm": 0.8955893724229462,
      "learning_rate": 0.0001233918871090887,
      "loss": 0.65,
      "step": 7520
    },
    {
      "epoch": 0.4826812059012187,
      "grad_norm": 1.1039069837177617,
      "learning_rate": 0.0001232830266073879,
      "loss": 0.6262,
      "step": 7525
    },
    {
      "epoch": 0.4830019243104554,
      "grad_norm": 0.8240710234420133,
      "learning_rate": 0.00012317413692457125,
      "loss": 0.7796,
      "step": 7530
    },
    {
      "epoch": 0.4833226427196921,
      "grad_norm": 0.5672101461672577,
      "learning_rate": 0.0001230652181971126,
      "loss": 0.6606,
      "step": 7535
    },
    {
      "epoch": 0.4836433611289288,
      "grad_norm": 0.6312799174708051,
      "learning_rate": 0.00012295627056152205,
      "loss": 0.6847,
      "step": 7540
    },
    {
      "epoch": 0.4839640795381655,
      "grad_norm": 0.9279904903302523,
      "learning_rate": 0.0001228472941543461,
      "loss": 0.7298,
      "step": 7545
    },
    {
      "epoch": 0.4842847979474022,
      "grad_norm": 1.0061624072103414,
      "learning_rate": 0.00012273828911216715,
      "loss": 0.688,
      "step": 7550
    },
    {
      "epoch": 0.48460551635663884,
      "grad_norm": 0.9531338313200752,
      "learning_rate": 0.00012262925557160362,
      "loss": 0.7381,
      "step": 7555
    },
    {
      "epoch": 0.48492623476587554,
      "grad_norm": 0.9084381778100004,
      "learning_rate": 0.0001225201936693095,
      "loss": 0.5676,
      "step": 7560
    },
    {
      "epoch": 0.48524695317511224,
      "grad_norm": 1.0203436397332364,
      "learning_rate": 0.00012241110354197448,
      "loss": 0.571,
      "step": 7565
    },
    {
      "epoch": 0.48556767158434894,
      "grad_norm": 0.9169062207127215,
      "learning_rate": 0.00012230198532632347,
      "loss": 0.6456,
      "step": 7570
    },
    {
      "epoch": 0.48588838999358563,
      "grad_norm": 0.6002350728637655,
      "learning_rate": 0.0001221928391591167,
      "loss": 0.6998,
      "step": 7575
    },
    {
      "epoch": 0.48620910840282233,
      "grad_norm": 0.5575094896397851,
      "learning_rate": 0.00012208366517714946,
      "loss": 0.6751,
      "step": 7580
    },
    {
      "epoch": 0.48652982681205903,
      "grad_norm": 0.7309868460212633,
      "learning_rate": 0.00012197446351725174,
      "loss": 0.6152,
      "step": 7585
    },
    {
      "epoch": 0.4868505452212957,
      "grad_norm": 0.9692168543018325,
      "learning_rate": 0.0001218652343162884,
      "loss": 0.6374,
      "step": 7590
    },
    {
      "epoch": 0.48717126363053237,
      "grad_norm": 0.7189150002506619,
      "learning_rate": 0.00012175597771115871,
      "loss": 0.7784,
      "step": 7595
    },
    {
      "epoch": 0.48749198203976907,
      "grad_norm": 0.8123916784425887,
      "learning_rate": 0.0001216466938387963,
      "loss": 0.5559,
      "step": 7600
    },
    {
      "epoch": 0.48781270044900576,
      "grad_norm": 0.903323959073406,
      "learning_rate": 0.00012153738283616897,
      "loss": 0.6245,
      "step": 7605
    },
    {
      "epoch": 0.48813341885824246,
      "grad_norm": 1.1841897784251287,
      "learning_rate": 0.00012142804484027862,
      "loss": 0.7076,
      "step": 7610
    },
    {
      "epoch": 0.48845413726747916,
      "grad_norm": 0.96970852663879,
      "learning_rate": 0.0001213186799881608,
      "loss": 0.6394,
      "step": 7615
    },
    {
      "epoch": 0.48877485567671586,
      "grad_norm": 0.9366182177279975,
      "learning_rate": 0.00012120928841688486,
      "loss": 0.6738,
      "step": 7620
    },
    {
      "epoch": 0.48909557408595256,
      "grad_norm": 0.6547998596688648,
      "learning_rate": 0.0001210998702635536,
      "loss": 0.5484,
      "step": 7625
    },
    {
      "epoch": 0.4894162924951892,
      "grad_norm": 0.61835825910844,
      "learning_rate": 0.00012099042566530318,
      "loss": 0.7106,
      "step": 7630
    },
    {
      "epoch": 0.4897370109044259,
      "grad_norm": 0.9889648893113016,
      "learning_rate": 0.00012088095475930281,
      "loss": 0.6665,
      "step": 7635
    },
    {
      "epoch": 0.4900577293136626,
      "grad_norm": 1.0009313158645148,
      "learning_rate": 0.00012077145768275473,
      "loss": 0.7342,
      "step": 7640
    },
    {
      "epoch": 0.4903784477228993,
      "grad_norm": 1.207980433506984,
      "learning_rate": 0.00012066193457289397,
      "loss": 0.797,
      "step": 7645
    },
    {
      "epoch": 0.490699166132136,
      "grad_norm": 0.7854979595695312,
      "learning_rate": 0.00012055238556698816,
      "loss": 0.6988,
      "step": 7650
    },
    {
      "epoch": 0.4910198845413727,
      "grad_norm": 0.7188797039130606,
      "learning_rate": 0.00012044281080233746,
      "loss": 0.7325,
      "step": 7655
    },
    {
      "epoch": 0.4913406029506094,
      "grad_norm": 0.9561317362271494,
      "learning_rate": 0.00012033321041627425,
      "loss": 0.6506,
      "step": 7660
    },
    {
      "epoch": 0.4916613213598461,
      "grad_norm": 0.7528076899928123,
      "learning_rate": 0.00012022358454616306,
      "loss": 0.5609,
      "step": 7665
    },
    {
      "epoch": 0.4919820397690827,
      "grad_norm": 0.8596601027470778,
      "learning_rate": 0.0001201139333294003,
      "loss": 0.6597,
      "step": 7670
    },
    {
      "epoch": 0.4923027581783194,
      "grad_norm": 0.6508137207715219,
      "learning_rate": 0.00012000425690341422,
      "loss": 0.4953,
      "step": 7675
    },
    {
      "epoch": 0.4926234765875561,
      "grad_norm": 0.8505276898684504,
      "learning_rate": 0.00011989455540566462,
      "loss": 0.6649,
      "step": 7680
    },
    {
      "epoch": 0.4929441949967928,
      "grad_norm": 0.758748378012195,
      "learning_rate": 0.00011978482897364273,
      "loss": 0.7204,
      "step": 7685
    },
    {
      "epoch": 0.4932649134060295,
      "grad_norm": 0.8242651845310669,
      "learning_rate": 0.00011967507774487108,
      "loss": 0.6598,
      "step": 7690
    },
    {
      "epoch": 0.4935856318152662,
      "grad_norm": 0.8816627197677691,
      "learning_rate": 0.0001195653018569032,
      "loss": 0.8369,
      "step": 7695
    },
    {
      "epoch": 0.4939063502245029,
      "grad_norm": 0.781020774879966,
      "learning_rate": 0.00011945550144732354,
      "loss": 0.7912,
      "step": 7700
    },
    {
      "epoch": 0.49422706863373955,
      "grad_norm": 0.5912028419510443,
      "learning_rate": 0.00011934567665374732,
      "loss": 0.673,
      "step": 7705
    },
    {
      "epoch": 0.49454778704297625,
      "grad_norm": 0.7852150600454825,
      "learning_rate": 0.00011923582761382031,
      "loss": 0.6989,
      "step": 7710
    },
    {
      "epoch": 0.49486850545221295,
      "grad_norm": 0.8345934386959575,
      "learning_rate": 0.00011912595446521868,
      "loss": 0.6319,
      "step": 7715
    },
    {
      "epoch": 0.49518922386144965,
      "grad_norm": 1.2815263854782484,
      "learning_rate": 0.0001190160573456488,
      "loss": 0.6247,
      "step": 7720
    },
    {
      "epoch": 0.49550994227068634,
      "grad_norm": 1.1234841964502218,
      "learning_rate": 0.00011890613639284704,
      "loss": 0.653,
      "step": 7725
    },
    {
      "epoch": 0.49583066067992304,
      "grad_norm": 0.9428012694473118,
      "learning_rate": 0.00011879619174457976,
      "loss": 0.9064,
      "step": 7730
    },
    {
      "epoch": 0.49615137908915974,
      "grad_norm": 0.7822481283735353,
      "learning_rate": 0.00011868622353864285,
      "loss": 0.5887,
      "step": 7735
    },
    {
      "epoch": 0.4964720974983964,
      "grad_norm": 0.6197300598147442,
      "learning_rate": 0.00011857623191286186,
      "loss": 0.5871,
      "step": 7740
    },
    {
      "epoch": 0.4967928159076331,
      "grad_norm": 0.6742268900193886,
      "learning_rate": 0.00011846621700509171,
      "loss": 0.6153,
      "step": 7745
    },
    {
      "epoch": 0.4971135343168698,
      "grad_norm": 1.0097074349573119,
      "learning_rate": 0.00011835617895321633,
      "loss": 0.726,
      "step": 7750
    },
    {
      "epoch": 0.4974342527261065,
      "grad_norm": 0.7938742619155006,
      "learning_rate": 0.00011824611789514881,
      "loss": 0.7576,
      "step": 7755
    },
    {
      "epoch": 0.49775497113534317,
      "grad_norm": 0.7594193522785816,
      "learning_rate": 0.00011813603396883108,
      "loss": 0.631,
      "step": 7760
    },
    {
      "epoch": 0.49807568954457987,
      "grad_norm": 1.1449681048330884,
      "learning_rate": 0.0001180259273122336,
      "loss": 0.8346,
      "step": 7765
    },
    {
      "epoch": 0.49839640795381657,
      "grad_norm": 0.6106704277152839,
      "learning_rate": 0.00011791579806335547,
      "loss": 0.7094,
      "step": 7770
    },
    {
      "epoch": 0.49871712636305326,
      "grad_norm": 0.9764152562715487,
      "learning_rate": 0.000117805646360224,
      "loss": 0.7922,
      "step": 7775
    },
    {
      "epoch": 0.4990378447722899,
      "grad_norm": 1.4581971435959649,
      "learning_rate": 0.00011769547234089469,
      "loss": 0.7598,
      "step": 7780
    },
    {
      "epoch": 0.4993585631815266,
      "grad_norm": 1.1726593622900077,
      "learning_rate": 0.00011758527614345097,
      "loss": 0.6934,
      "step": 7785
    },
    {
      "epoch": 0.4996792815907633,
      "grad_norm": 1.382229173196648,
      "learning_rate": 0.00011747505790600412,
      "loss": 0.6793,
      "step": 7790
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.7583044707535523,
      "learning_rate": 0.00011736481776669306,
      "loss": 0.7244,
      "step": 7795
    },
    {
      "epoch": 0.5003207184092366,
      "grad_norm": 1.0327502481504163,
      "learning_rate": 0.000117254555863684,
      "loss": 0.7023,
      "step": 7800
    },
    {
      "epoch": 0.5006414368184734,
      "grad_norm": 0.6928521319692996,
      "learning_rate": 0.00011714427233517069,
      "loss": 0.5508,
      "step": 7805
    },
    {
      "epoch": 0.50096215522771,
      "grad_norm": 0.6645980452165248,
      "learning_rate": 0.0001170339673193737,
      "loss": 0.7463,
      "step": 7810
    },
    {
      "epoch": 0.5012828736369468,
      "grad_norm": 0.6668044106727686,
      "learning_rate": 0.00011692364095454076,
      "loss": 0.6357,
      "step": 7815
    },
    {
      "epoch": 0.5016035920461834,
      "grad_norm": 0.9287710383565055,
      "learning_rate": 0.00011681329337894623,
      "loss": 0.6308,
      "step": 7820
    },
    {
      "epoch": 0.5019243104554202,
      "grad_norm": 1.3104043465513664,
      "learning_rate": 0.0001167029247308911,
      "loss": 0.5399,
      "step": 7825
    },
    {
      "epoch": 0.5022450288646568,
      "grad_norm": 1.428373507944948,
      "learning_rate": 0.00011659253514870276,
      "loss": 0.7011,
      "step": 7830
    },
    {
      "epoch": 0.5025657472738935,
      "grad_norm": 0.833100109623975,
      "learning_rate": 0.00011648212477073484,
      "loss": 0.7404,
      "step": 7835
    },
    {
      "epoch": 0.5028864656831302,
      "grad_norm": 1.0751700158927022,
      "learning_rate": 0.00011637169373536698,
      "loss": 0.6389,
      "step": 7840
    },
    {
      "epoch": 0.5032071840923669,
      "grad_norm": 0.9610389244865,
      "learning_rate": 0.00011626124218100483,
      "loss": 0.732,
      "step": 7845
    },
    {
      "epoch": 0.5035279025016036,
      "grad_norm": 1.4064338381179782,
      "learning_rate": 0.00011615077024607965,
      "loss": 0.7248,
      "step": 7850
    },
    {
      "epoch": 0.5038486209108403,
      "grad_norm": 1.0089167449788845,
      "learning_rate": 0.00011604027806904833,
      "loss": 0.6808,
      "step": 7855
    },
    {
      "epoch": 0.504169339320077,
      "grad_norm": 0.8297282225570892,
      "learning_rate": 0.00011592976578839303,
      "loss": 0.7505,
      "step": 7860
    },
    {
      "epoch": 0.5044900577293137,
      "grad_norm": 0.8562597418732677,
      "learning_rate": 0.00011581923354262117,
      "loss": 0.7069,
      "step": 7865
    },
    {
      "epoch": 0.5048107761385503,
      "grad_norm": 1.1555443138727173,
      "learning_rate": 0.00011570868147026517,
      "loss": 0.6213,
      "step": 7870
    },
    {
      "epoch": 0.505131494547787,
      "grad_norm": 1.4259877059174733,
      "learning_rate": 0.00011559810970988232,
      "loss": 0.6105,
      "step": 7875
    },
    {
      "epoch": 0.5054522129570237,
      "grad_norm": 0.6183735071336424,
      "learning_rate": 0.00011548751840005459,
      "loss": 0.4662,
      "step": 7880
    },
    {
      "epoch": 0.5057729313662604,
      "grad_norm": 0.9453435423443054,
      "learning_rate": 0.00011537690767938843,
      "loss": 0.6083,
      "step": 7885
    },
    {
      "epoch": 0.5060936497754971,
      "grad_norm": 0.6729282582317203,
      "learning_rate": 0.00011526627768651459,
      "loss": 0.7553,
      "step": 7890
    },
    {
      "epoch": 0.5064143681847338,
      "grad_norm": 0.8579324957843062,
      "learning_rate": 0.00011515562856008808,
      "loss": 0.7014,
      "step": 7895
    },
    {
      "epoch": 0.5067350865939705,
      "grad_norm": 0.9652710068101304,
      "learning_rate": 0.00011504496043878776,
      "loss": 0.7203,
      "step": 7900
    },
    {
      "epoch": 0.5070558050032072,
      "grad_norm": 1.3328325121052935,
      "learning_rate": 0.00011493427346131636,
      "loss": 0.7462,
      "step": 7905
    },
    {
      "epoch": 0.5073765234124439,
      "grad_norm": 0.7750774157499563,
      "learning_rate": 0.00011482356776640028,
      "loss": 0.7554,
      "step": 7910
    },
    {
      "epoch": 0.5076972418216805,
      "grad_norm": 0.7771858604565626,
      "learning_rate": 0.00011471284349278928,
      "loss": 0.7032,
      "step": 7915
    },
    {
      "epoch": 0.5080179602309173,
      "grad_norm": 0.9990707053591126,
      "learning_rate": 0.0001146021007792565,
      "loss": 0.5966,
      "step": 7920
    },
    {
      "epoch": 0.5083386786401539,
      "grad_norm": 0.9864579497103747,
      "learning_rate": 0.00011449133976459816,
      "loss": 0.701,
      "step": 7925
    },
    {
      "epoch": 0.5086593970493907,
      "grad_norm": 0.9752505086126679,
      "learning_rate": 0.0001143805605876334,
      "loss": 0.6502,
      "step": 7930
    },
    {
      "epoch": 0.5089801154586273,
      "grad_norm": 1.3306389404931571,
      "learning_rate": 0.00011426976338720412,
      "loss": 0.6592,
      "step": 7935
    },
    {
      "epoch": 0.5093008338678641,
      "grad_norm": 0.6705402480174242,
      "learning_rate": 0.00011415894830217486,
      "loss": 0.6531,
      "step": 7940
    },
    {
      "epoch": 0.5096215522771007,
      "grad_norm": 0.8130683741487627,
      "learning_rate": 0.00011404811547143251,
      "loss": 0.7333,
      "step": 7945
    },
    {
      "epoch": 0.5099422706863374,
      "grad_norm": 1.1664159763922086,
      "learning_rate": 0.0001139372650338862,
      "loss": 0.8146,
      "step": 7950
    },
    {
      "epoch": 0.5102629890955741,
      "grad_norm": 0.5999515830143689,
      "learning_rate": 0.00011382639712846721,
      "loss": 0.5825,
      "step": 7955
    },
    {
      "epoch": 0.5105837075048107,
      "grad_norm": 1.1054727651684402,
      "learning_rate": 0.00011371551189412868,
      "loss": 0.7374,
      "step": 7960
    },
    {
      "epoch": 0.5109044259140475,
      "grad_norm": 1.0319949146313503,
      "learning_rate": 0.00011360460946984537,
      "loss": 0.7562,
      "step": 7965
    },
    {
      "epoch": 0.5112251443232841,
      "grad_norm": 0.6047170156572763,
      "learning_rate": 0.00011349368999461374,
      "loss": 0.7588,
      "step": 7970
    },
    {
      "epoch": 0.5115458627325209,
      "grad_norm": 0.8725079332758466,
      "learning_rate": 0.00011338275360745147,
      "loss": 0.7421,
      "step": 7975
    },
    {
      "epoch": 0.5118665811417575,
      "grad_norm": 0.784376771151006,
      "learning_rate": 0.00011327180044739755,
      "loss": 0.5837,
      "step": 7980
    },
    {
      "epoch": 0.5121872995509942,
      "grad_norm": 0.8977359490481988,
      "learning_rate": 0.00011316083065351195,
      "loss": 0.7392,
      "step": 7985
    },
    {
      "epoch": 0.5125080179602309,
      "grad_norm": 0.653772242009018,
      "learning_rate": 0.00011304984436487551,
      "loss": 0.6166,
      "step": 7990
    },
    {
      "epoch": 0.5128287363694676,
      "grad_norm": 1.2310492343797879,
      "learning_rate": 0.00011293884172058971,
      "loss": 0.5507,
      "step": 7995
    },
    {
      "epoch": 0.5131494547787043,
      "grad_norm": 1.0077531207139014,
      "learning_rate": 0.00011282782285977649,
      "loss": 0.6358,
      "step": 8000
    },
    {
      "epoch": 0.513470173187941,
      "grad_norm": 1.19554249733326,
      "learning_rate": 0.00011271678792157823,
      "loss": 0.6614,
      "step": 8005
    },
    {
      "epoch": 0.5137908915971777,
      "grad_norm": 0.8654028252618859,
      "learning_rate": 0.00011260573704515734,
      "loss": 0.6444,
      "step": 8010
    },
    {
      "epoch": 0.5141116100064144,
      "grad_norm": 0.9637998906695273,
      "learning_rate": 0.00011249467036969632,
      "loss": 0.6859,
      "step": 8015
    },
    {
      "epoch": 0.514432328415651,
      "grad_norm": 1.2621981138132725,
      "learning_rate": 0.00011238358803439739,
      "loss": 0.7247,
      "step": 8020
    },
    {
      "epoch": 0.5147530468248878,
      "grad_norm": 0.6255230049474781,
      "learning_rate": 0.0001122724901784824,
      "loss": 0.7025,
      "step": 8025
    },
    {
      "epoch": 0.5150737652341244,
      "grad_norm": 0.8124027597004405,
      "learning_rate": 0.00011216137694119271,
      "loss": 0.6465,
      "step": 8030
    },
    {
      "epoch": 0.5153944836433612,
      "grad_norm": 0.7060753692578354,
      "learning_rate": 0.00011205024846178886,
      "loss": 0.5977,
      "step": 8035
    },
    {
      "epoch": 0.5157152020525978,
      "grad_norm": 0.9066775542047206,
      "learning_rate": 0.00011193910487955059,
      "loss": 0.6407,
      "step": 8040
    },
    {
      "epoch": 0.5160359204618346,
      "grad_norm": 0.6903326908804434,
      "learning_rate": 0.00011182794633377653,
      "loss": 0.6925,
      "step": 8045
    },
    {
      "epoch": 0.5163566388710712,
      "grad_norm": 0.9472934152436594,
      "learning_rate": 0.00011171677296378411,
      "loss": 0.7609,
      "step": 8050
    },
    {
      "epoch": 0.5166773572803078,
      "grad_norm": 1.0828907895794335,
      "learning_rate": 0.0001116055849089092,
      "loss": 0.7855,
      "step": 8055
    },
    {
      "epoch": 0.5169980756895446,
      "grad_norm": 1.3155495321215651,
      "learning_rate": 0.00011149438230850626,
      "loss": 0.6561,
      "step": 8060
    },
    {
      "epoch": 0.5173187940987812,
      "grad_norm": 0.7751536928800652,
      "learning_rate": 0.00011138316530194782,
      "loss": 0.6302,
      "step": 8065
    },
    {
      "epoch": 0.517639512508018,
      "grad_norm": 1.278374102598091,
      "learning_rate": 0.00011127193402862457,
      "loss": 0.6741,
      "step": 8070
    },
    {
      "epoch": 0.5179602309172546,
      "grad_norm": 0.7961067269873462,
      "learning_rate": 0.00011116068862794506,
      "loss": 0.7248,
      "step": 8075
    },
    {
      "epoch": 0.5182809493264914,
      "grad_norm": 0.9325619210714818,
      "learning_rate": 0.0001110494292393355,
      "loss": 0.6036,
      "step": 8080
    },
    {
      "epoch": 0.518601667735728,
      "grad_norm": 0.9427970552237784,
      "learning_rate": 0.00011093815600223966,
      "loss": 0.6906,
      "step": 8085
    },
    {
      "epoch": 0.5189223861449648,
      "grad_norm": 0.9820235565256558,
      "learning_rate": 0.00011082686905611872,
      "loss": 0.6996,
      "step": 8090
    },
    {
      "epoch": 0.5192431045542014,
      "grad_norm": 0.7847448260775505,
      "learning_rate": 0.00011071556854045098,
      "loss": 0.67,
      "step": 8095
    },
    {
      "epoch": 0.5195638229634381,
      "grad_norm": 0.7114519312016215,
      "learning_rate": 0.00011060425459473169,
      "loss": 0.6844,
      "step": 8100
    },
    {
      "epoch": 0.5198845413726748,
      "grad_norm": 0.6238373643554763,
      "learning_rate": 0.00011049292735847312,
      "loss": 0.5971,
      "step": 8105
    },
    {
      "epoch": 0.5202052597819115,
      "grad_norm": 0.9399929160198239,
      "learning_rate": 0.00011038158697120395,
      "loss": 0.6189,
      "step": 8110
    },
    {
      "epoch": 0.5205259781911482,
      "grad_norm": 1.1129758526237858,
      "learning_rate": 0.00011027023357246955,
      "loss": 0.7023,
      "step": 8115
    },
    {
      "epoch": 0.5208466966003849,
      "grad_norm": 1.049212324811729,
      "learning_rate": 0.00011015886730183152,
      "loss": 0.7014,
      "step": 8120
    },
    {
      "epoch": 0.5211674150096216,
      "grad_norm": 0.8599253114644705,
      "learning_rate": 0.00011004748829886755,
      "loss": 0.6835,
      "step": 8125
    },
    {
      "epoch": 0.5214881334188582,
      "grad_norm": 0.6066610732008468,
      "learning_rate": 0.0001099360967031714,
      "loss": 0.5214,
      "step": 8130
    },
    {
      "epoch": 0.5218088518280949,
      "grad_norm": 0.8343848602348406,
      "learning_rate": 0.00010982469265435249,
      "loss": 0.6169,
      "step": 8135
    },
    {
      "epoch": 0.5221295702373316,
      "grad_norm": 0.4237175002588996,
      "learning_rate": 0.00010971327629203587,
      "loss": 0.5628,
      "step": 8140
    },
    {
      "epoch": 0.5224502886465683,
      "grad_norm": 0.7612853893387608,
      "learning_rate": 0.00010960184775586209,
      "loss": 0.6496,
      "step": 8145
    },
    {
      "epoch": 0.522771007055805,
      "grad_norm": 0.7090497030288603,
      "learning_rate": 0.00010949040718548693,
      "loss": 0.6699,
      "step": 8150
    },
    {
      "epoch": 0.5230917254650417,
      "grad_norm": 0.8137233187040953,
      "learning_rate": 0.00010937895472058126,
      "loss": 0.7825,
      "step": 8155
    },
    {
      "epoch": 0.5234124438742784,
      "grad_norm": 1.106458178679526,
      "learning_rate": 0.0001092674905008308,
      "loss": 0.5917,
      "step": 8160
    },
    {
      "epoch": 0.5237331622835151,
      "grad_norm": 1.1023421333903827,
      "learning_rate": 0.00010915601466593604,
      "loss": 0.652,
      "step": 8165
    },
    {
      "epoch": 0.5240538806927517,
      "grad_norm": 1.2339053368878727,
      "learning_rate": 0.00010904452735561204,
      "loss": 0.7531,
      "step": 8170
    },
    {
      "epoch": 0.5243745991019885,
      "grad_norm": 0.8536672713520308,
      "learning_rate": 0.00010893302870958824,
      "loss": 0.6808,
      "step": 8175
    },
    {
      "epoch": 0.5246953175112251,
      "grad_norm": 0.9072452347961674,
      "learning_rate": 0.00010882151886760827,
      "loss": 0.7883,
      "step": 8180
    },
    {
      "epoch": 0.5250160359204619,
      "grad_norm": 0.705408047927468,
      "learning_rate": 0.00010870999796942986,
      "loss": 0.7448,
      "step": 8185
    },
    {
      "epoch": 0.5253367543296985,
      "grad_norm": 0.84842819642806,
      "learning_rate": 0.00010859846615482448,
      "loss": 0.7873,
      "step": 8190
    },
    {
      "epoch": 0.5256574727389353,
      "grad_norm": 0.9668127437981949,
      "learning_rate": 0.00010848692356357735,
      "loss": 0.6553,
      "step": 8195
    },
    {
      "epoch": 0.5259781911481719,
      "grad_norm": 1.3910270737631052,
      "learning_rate": 0.00010837537033548718,
      "loss": 0.551,
      "step": 8200
    },
    {
      "epoch": 0.5262989095574085,
      "grad_norm": 0.8934045053705592,
      "learning_rate": 0.00010826380661036601,
      "loss": 0.755,
      "step": 8205
    },
    {
      "epoch": 0.5266196279666453,
      "grad_norm": 0.7580165266865208,
      "learning_rate": 0.0001081522325280391,
      "loss": 0.6785,
      "step": 8210
    },
    {
      "epoch": 0.5269403463758819,
      "grad_norm": 0.895270436973056,
      "learning_rate": 0.00010804064822834461,
      "loss": 0.6188,
      "step": 8215
    },
    {
      "epoch": 0.5272610647851187,
      "grad_norm": 0.8349917473129711,
      "learning_rate": 0.0001079290538511335,
      "loss": 0.5295,
      "step": 8220
    },
    {
      "epoch": 0.5275817831943553,
      "grad_norm": 1.0937712586985149,
      "learning_rate": 0.00010781744953626944,
      "loss": 0.718,
      "step": 8225
    },
    {
      "epoch": 0.5279025016035921,
      "grad_norm": 0.9776711832493594,
      "learning_rate": 0.00010770583542362848,
      "loss": 0.7394,
      "step": 8230
    },
    {
      "epoch": 0.5282232200128287,
      "grad_norm": 0.9916244110681041,
      "learning_rate": 0.00010759421165309898,
      "loss": 0.6302,
      "step": 8235
    },
    {
      "epoch": 0.5285439384220654,
      "grad_norm": 0.7709724576720045,
      "learning_rate": 0.00010748257836458142,
      "loss": 0.4377,
      "step": 8240
    },
    {
      "epoch": 0.5288646568313021,
      "grad_norm": 0.9553016321868766,
      "learning_rate": 0.00010737093569798815,
      "loss": 0.5929,
      "step": 8245
    },
    {
      "epoch": 0.5291853752405388,
      "grad_norm": 0.5921375135170813,
      "learning_rate": 0.00010725928379324335,
      "loss": 0.6308,
      "step": 8250
    },
    {
      "epoch": 0.5295060936497755,
      "grad_norm": 0.9409908884682822,
      "learning_rate": 0.00010714762279028275,
      "loss": 0.6488,
      "step": 8255
    },
    {
      "epoch": 0.5298268120590122,
      "grad_norm": 0.9164401991956044,
      "learning_rate": 0.00010703595282905343,
      "loss": 0.7185,
      "step": 8260
    },
    {
      "epoch": 0.5301475304682489,
      "grad_norm": 0.7915811080548818,
      "learning_rate": 0.00010692427404951379,
      "loss": 0.7002,
      "step": 8265
    },
    {
      "epoch": 0.5304682488774856,
      "grad_norm": 1.1633281858494344,
      "learning_rate": 0.00010681258659163322,
      "loss": 0.7142,
      "step": 8270
    },
    {
      "epoch": 0.5307889672867223,
      "grad_norm": 1.1360488426032926,
      "learning_rate": 0.00010670089059539201,
      "loss": 0.6164,
      "step": 8275
    },
    {
      "epoch": 0.531109685695959,
      "grad_norm": 0.9950081272171089,
      "learning_rate": 0.0001065891862007811,
      "loss": 0.5403,
      "step": 8280
    },
    {
      "epoch": 0.5314304041051956,
      "grad_norm": 1.0499402732473173,
      "learning_rate": 0.00010647747354780206,
      "loss": 0.6409,
      "step": 8285
    },
    {
      "epoch": 0.5317511225144324,
      "grad_norm": 0.9441134224109928,
      "learning_rate": 0.00010636575277646672,
      "loss": 0.5947,
      "step": 8290
    },
    {
      "epoch": 0.532071840923669,
      "grad_norm": 1.3058395760608197,
      "learning_rate": 0.00010625402402679712,
      "loss": 0.6901,
      "step": 8295
    },
    {
      "epoch": 0.5323925593329057,
      "grad_norm": 0.8650565306977751,
      "learning_rate": 0.0001061422874388253,
      "loss": 0.6536,
      "step": 8300
    },
    {
      "epoch": 0.5327132777421424,
      "grad_norm": 1.1023501837328433,
      "learning_rate": 0.0001060305431525931,
      "loss": 0.7735,
      "step": 8305
    },
    {
      "epoch": 0.5330339961513791,
      "grad_norm": 0.7402707462941108,
      "learning_rate": 0.00010591879130815206,
      "loss": 0.7746,
      "step": 8310
    },
    {
      "epoch": 0.5333547145606158,
      "grad_norm": 1.0334014975634367,
      "learning_rate": 0.0001058070320455631,
      "loss": 0.6197,
      "step": 8315
    },
    {
      "epoch": 0.5336754329698524,
      "grad_norm": 0.8973174424463937,
      "learning_rate": 0.00010569526550489656,
      "loss": 0.6662,
      "step": 8320
    },
    {
      "epoch": 0.5339961513790892,
      "grad_norm": 1.1260137879030736,
      "learning_rate": 0.00010558349182623182,
      "loss": 0.7384,
      "step": 8325
    },
    {
      "epoch": 0.5343168697883258,
      "grad_norm": 1.0775603650728314,
      "learning_rate": 0.00010547171114965721,
      "loss": 0.53,
      "step": 8330
    },
    {
      "epoch": 0.5346375881975626,
      "grad_norm": 0.8657241626493881,
      "learning_rate": 0.00010535992361526986,
      "loss": 0.6597,
      "step": 8335
    },
    {
      "epoch": 0.5349583066067992,
      "grad_norm": 0.7754986474145258,
      "learning_rate": 0.00010524812936317545,
      "loss": 0.7155,
      "step": 8340
    },
    {
      "epoch": 0.535279025016036,
      "grad_norm": 0.7235913108295569,
      "learning_rate": 0.00010513632853348817,
      "loss": 0.63,
      "step": 8345
    },
    {
      "epoch": 0.5355997434252726,
      "grad_norm": 1.0376021153773205,
      "learning_rate": 0.00010502452126633033,
      "loss": 0.7389,
      "step": 8350
    },
    {
      "epoch": 0.5359204618345093,
      "grad_norm": 1.0736867388991156,
      "learning_rate": 0.00010491270770183241,
      "loss": 0.7524,
      "step": 8355
    },
    {
      "epoch": 0.536241180243746,
      "grad_norm": 1.2875466262160882,
      "learning_rate": 0.00010480088798013274,
      "loss": 0.7637,
      "step": 8360
    },
    {
      "epoch": 0.5365618986529826,
      "grad_norm": 1.0698179015991502,
      "learning_rate": 0.00010468906224137736,
      "loss": 0.7777,
      "step": 8365
    },
    {
      "epoch": 0.5368826170622194,
      "grad_norm": 0.715308845951178,
      "learning_rate": 0.00010457723062571984,
      "loss": 0.581,
      "step": 8370
    },
    {
      "epoch": 0.537203335471456,
      "grad_norm": 1.9992463200156003,
      "learning_rate": 0.00010446539327332121,
      "loss": 0.6813,
      "step": 8375
    },
    {
      "epoch": 0.5375240538806928,
      "grad_norm": 0.9082670120549011,
      "learning_rate": 0.00010435355032434958,
      "loss": 0.8172,
      "step": 8380
    },
    {
      "epoch": 0.5378447722899294,
      "grad_norm": 0.5039137526581597,
      "learning_rate": 0.00010424170191898006,
      "loss": 0.6443,
      "step": 8385
    },
    {
      "epoch": 0.5381654906991661,
      "grad_norm": 0.8357611125226391,
      "learning_rate": 0.00010412984819739473,
      "loss": 0.6672,
      "step": 8390
    },
    {
      "epoch": 0.5384862091084028,
      "grad_norm": 0.9107912987485977,
      "learning_rate": 0.00010401798929978224,
      "loss": 0.6107,
      "step": 8395
    },
    {
      "epoch": 0.5388069275176395,
      "grad_norm": 0.8281442376194428,
      "learning_rate": 0.0001039061253663377,
      "loss": 0.6075,
      "step": 8400
    },
    {
      "epoch": 0.5391276459268762,
      "grad_norm": 0.7249862380029812,
      "learning_rate": 0.00010379425653726263,
      "loss": 0.7265,
      "step": 8405
    },
    {
      "epoch": 0.5394483643361129,
      "grad_norm": 0.9092092180370709,
      "learning_rate": 0.00010368238295276455,
      "loss": 0.6893,
      "step": 8410
    },
    {
      "epoch": 0.5397690827453496,
      "grad_norm": 0.6540167568734936,
      "learning_rate": 0.0001035705047530571,
      "loss": 0.7305,
      "step": 8415
    },
    {
      "epoch": 0.5400898011545863,
      "grad_norm": 0.7981383776198956,
      "learning_rate": 0.00010345862207835957,
      "loss": 0.6453,
      "step": 8420
    },
    {
      "epoch": 0.5404105195638229,
      "grad_norm": 0.945104000015912,
      "learning_rate": 0.00010334673506889696,
      "loss": 0.7016,
      "step": 8425
    },
    {
      "epoch": 0.5407312379730597,
      "grad_norm": 1.0547131113611765,
      "learning_rate": 0.00010323484386489961,
      "loss": 0.7347,
      "step": 8430
    },
    {
      "epoch": 0.5410519563822963,
      "grad_norm": 0.8025281891388182,
      "learning_rate": 0.00010312294860660319,
      "loss": 0.5264,
      "step": 8435
    },
    {
      "epoch": 0.5413726747915331,
      "grad_norm": 0.9019250163215435,
      "learning_rate": 0.0001030110494342484,
      "loss": 0.5963,
      "step": 8440
    },
    {
      "epoch": 0.5416933932007697,
      "grad_norm": 0.6368675777184184,
      "learning_rate": 0.00010289914648808088,
      "loss": 0.5399,
      "step": 8445
    },
    {
      "epoch": 0.5420141116100065,
      "grad_norm": 0.8008826667949324,
      "learning_rate": 0.00010278723990835097,
      "loss": 0.7476,
      "step": 8450
    },
    {
      "epoch": 0.5423348300192431,
      "grad_norm": 0.7219125921723233,
      "learning_rate": 0.0001026753298353136,
      "loss": 0.5883,
      "step": 8455
    },
    {
      "epoch": 0.5426555484284797,
      "grad_norm": 0.6992313736984004,
      "learning_rate": 0.0001025634164092281,
      "loss": 0.5797,
      "step": 8460
    },
    {
      "epoch": 0.5429762668377165,
      "grad_norm": 0.44695714450265767,
      "learning_rate": 0.00010245149977035792,
      "loss": 0.6473,
      "step": 8465
    },
    {
      "epoch": 0.5432969852469531,
      "grad_norm": 1.248682759415961,
      "learning_rate": 0.00010233958005897058,
      "loss": 0.5812,
      "step": 8470
    },
    {
      "epoch": 0.5436177036561899,
      "grad_norm": 1.0568826134330056,
      "learning_rate": 0.00010222765741533744,
      "loss": 0.7862,
      "step": 8475
    },
    {
      "epoch": 0.5439384220654265,
      "grad_norm": 0.8116820280676993,
      "learning_rate": 0.00010211573197973356,
      "loss": 0.6353,
      "step": 8480
    },
    {
      "epoch": 0.5442591404746633,
      "grad_norm": 0.9997535811765578,
      "learning_rate": 0.00010200380389243753,
      "loss": 0.7229,
      "step": 8485
    },
    {
      "epoch": 0.5445798588838999,
      "grad_norm": 0.8261136419022004,
      "learning_rate": 0.00010189187329373113,
      "loss": 0.6919,
      "step": 8490
    },
    {
      "epoch": 0.5449005772931367,
      "grad_norm": 0.7977851457213406,
      "learning_rate": 0.00010177994032389946,
      "loss": 0.5777,
      "step": 8495
    },
    {
      "epoch": 0.5452212957023733,
      "grad_norm": 1.211421213402399,
      "learning_rate": 0.00010166800512323043,
      "loss": 0.6434,
      "step": 8500
    },
    {
      "epoch": 0.54554201411161,
      "grad_norm": 2.0722177427022244,
      "learning_rate": 0.00010155606783201488,
      "loss": 0.5933,
      "step": 8505
    },
    {
      "epoch": 0.5458627325208467,
      "grad_norm": 0.7874345109274467,
      "learning_rate": 0.00010144412859054617,
      "loss": 0.8209,
      "step": 8510
    },
    {
      "epoch": 0.5461834509300834,
      "grad_norm": 0.5164159774237933,
      "learning_rate": 0.00010133218753912023,
      "loss": 0.6337,
      "step": 8515
    },
    {
      "epoch": 0.5465041693393201,
      "grad_norm": 0.9997324723951748,
      "learning_rate": 0.00010122024481803509,
      "loss": 0.7799,
      "step": 8520
    },
    {
      "epoch": 0.5468248877485568,
      "grad_norm": 0.868379009704931,
      "learning_rate": 0.000101108300567591,
      "loss": 0.6205,
      "step": 8525
    },
    {
      "epoch": 0.5471456061577935,
      "grad_norm": 0.7487726179830052,
      "learning_rate": 0.00010099635492809007,
      "loss": 0.7024,
      "step": 8530
    },
    {
      "epoch": 0.5474663245670301,
      "grad_norm": 0.784320611343729,
      "learning_rate": 0.00010088440803983616,
      "loss": 0.765,
      "step": 8535
    },
    {
      "epoch": 0.5477870429762668,
      "grad_norm": 0.7657678123947386,
      "learning_rate": 0.00010077246004313472,
      "loss": 0.6496,
      "step": 8540
    },
    {
      "epoch": 0.5481077613855035,
      "grad_norm": 0.7225029829590283,
      "learning_rate": 0.00010066051107829259,
      "loss": 0.6885,
      "step": 8545
    },
    {
      "epoch": 0.5484284797947402,
      "grad_norm": 0.8979772778090884,
      "learning_rate": 0.00010054856128561778,
      "loss": 0.7111,
      "step": 8550
    },
    {
      "epoch": 0.5487491982039769,
      "grad_norm": 1.322201085524258,
      "learning_rate": 0.00010043661080541936,
      "loss": 0.6252,
      "step": 8555
    },
    {
      "epoch": 0.5490699166132136,
      "grad_norm": 0.6743113052462498,
      "learning_rate": 0.00010032465977800726,
      "loss": 0.5282,
      "step": 8560
    },
    {
      "epoch": 0.5493906350224503,
      "grad_norm": 0.8693068518513947,
      "learning_rate": 0.00010021270834369211,
      "loss": 0.6029,
      "step": 8565
    },
    {
      "epoch": 0.549711353431687,
      "grad_norm": 1.1870868813911406,
      "learning_rate": 0.00010010075664278507,
      "loss": 0.6264,
      "step": 8570
    },
    {
      "epoch": 0.5500320718409236,
      "grad_norm": 1.0567858782770287,
      "learning_rate": 9.998880481559755e-05,
      "loss": 0.8018,
      "step": 8575
    },
    {
      "epoch": 0.5503527902501604,
      "grad_norm": 0.8137731229847819,
      "learning_rate": 9.987685300244117e-05,
      "loss": 0.614,
      "step": 8580
    },
    {
      "epoch": 0.550673508659397,
      "grad_norm": 0.9599816781819811,
      "learning_rate": 9.976490134362759e-05,
      "loss": 0.687,
      "step": 8585
    },
    {
      "epoch": 0.5509942270686338,
      "grad_norm": 0.6181246421982609,
      "learning_rate": 9.965294997946815e-05,
      "loss": 0.6866,
      "step": 8590
    },
    {
      "epoch": 0.5513149454778704,
      "grad_norm": 1.1348648251746718,
      "learning_rate": 9.954099905027396e-05,
      "loss": 0.6416,
      "step": 8595
    },
    {
      "epoch": 0.5516356638871072,
      "grad_norm": 1.6639502602729528,
      "learning_rate": 9.94290486963555e-05,
      "loss": 0.6715,
      "step": 8600
    },
    {
      "epoch": 0.5519563822963438,
      "grad_norm": 0.7678034571145345,
      "learning_rate": 9.931709905802252e-05,
      "loss": 0.6886,
      "step": 8605
    },
    {
      "epoch": 0.5522771007055804,
      "grad_norm": 1.4578465770643851,
      "learning_rate": 9.92051502755839e-05,
      "loss": 0.7689,
      "step": 8610
    },
    {
      "epoch": 0.5525978191148172,
      "grad_norm": 0.7434972557340698,
      "learning_rate": 9.909320248934747e-05,
      "loss": 0.6374,
      "step": 8615
    },
    {
      "epoch": 0.5529185375240538,
      "grad_norm": 0.8031136082718469,
      "learning_rate": 9.898125583961977e-05,
      "loss": 0.7055,
      "step": 8620
    },
    {
      "epoch": 0.5532392559332906,
      "grad_norm": 1.000878821455057,
      "learning_rate": 9.886931046670598e-05,
      "loss": 0.6157,
      "step": 8625
    },
    {
      "epoch": 0.5535599743425272,
      "grad_norm": 0.6524291495733984,
      "learning_rate": 9.875736651090956e-05,
      "loss": 0.561,
      "step": 8630
    },
    {
      "epoch": 0.553880692751764,
      "grad_norm": 1.3537142167105929,
      "learning_rate": 9.864542411253229e-05,
      "loss": 0.6718,
      "step": 8635
    },
    {
      "epoch": 0.5542014111610006,
      "grad_norm": 1.2775573591627376,
      "learning_rate": 9.853348341187398e-05,
      "loss": 0.6645,
      "step": 8640
    },
    {
      "epoch": 0.5545221295702373,
      "grad_norm": 0.982975595575632,
      "learning_rate": 9.842154454923236e-05,
      "loss": 0.5919,
      "step": 8645
    },
    {
      "epoch": 0.554842847979474,
      "grad_norm": 0.960094691754927,
      "learning_rate": 9.830960766490274e-05,
      "loss": 0.8113,
      "step": 8650
    },
    {
      "epoch": 0.5551635663887107,
      "grad_norm": 0.7965375300164668,
      "learning_rate": 9.819767289917802e-05,
      "loss": 0.5782,
      "step": 8655
    },
    {
      "epoch": 0.5554842847979474,
      "grad_norm": 1.1381902966011452,
      "learning_rate": 9.808574039234843e-05,
      "loss": 0.6242,
      "step": 8660
    },
    {
      "epoch": 0.5558050032071841,
      "grad_norm": 0.8670424286605721,
      "learning_rate": 9.79738102847014e-05,
      "loss": 0.7355,
      "step": 8665
    },
    {
      "epoch": 0.5561257216164208,
      "grad_norm": 0.8366621626207873,
      "learning_rate": 9.786188271652133e-05,
      "loss": 0.5744,
      "step": 8670
    },
    {
      "epoch": 0.5564464400256575,
      "grad_norm": 0.8273685386138488,
      "learning_rate": 9.774995782808943e-05,
      "loss": 0.6414,
      "step": 8675
    },
    {
      "epoch": 0.5567671584348942,
      "grad_norm": 0.9522831235441542,
      "learning_rate": 9.763803575968357e-05,
      "loss": 0.7632,
      "step": 8680
    },
    {
      "epoch": 0.5570878768441309,
      "grad_norm": 0.75372169303836,
      "learning_rate": 9.752611665157807e-05,
      "loss": 0.6433,
      "step": 8685
    },
    {
      "epoch": 0.5574085952533675,
      "grad_norm": 1.2109886710417286,
      "learning_rate": 9.741420064404353e-05,
      "loss": 0.63,
      "step": 8690
    },
    {
      "epoch": 0.5577293136626043,
      "grad_norm": 0.5400874445069787,
      "learning_rate": 9.730228787734669e-05,
      "loss": 0.6789,
      "step": 8695
    },
    {
      "epoch": 0.5580500320718409,
      "grad_norm": 0.7989657543785353,
      "learning_rate": 9.719037849175023e-05,
      "loss": 0.7407,
      "step": 8700
    },
    {
      "epoch": 0.5583707504810776,
      "grad_norm": 0.7239899818926174,
      "learning_rate": 9.707847262751257e-05,
      "loss": 0.6029,
      "step": 8705
    },
    {
      "epoch": 0.5586914688903143,
      "grad_norm": 1.1080694844841645,
      "learning_rate": 9.696657042488774e-05,
      "loss": 0.6841,
      "step": 8710
    },
    {
      "epoch": 0.559012187299551,
      "grad_norm": 0.8668620206006121,
      "learning_rate": 9.685467202412514e-05,
      "loss": 0.8091,
      "step": 8715
    },
    {
      "epoch": 0.5593329057087877,
      "grad_norm": 0.8263012333520392,
      "learning_rate": 9.674277756546941e-05,
      "loss": 0.5612,
      "step": 8720
    },
    {
      "epoch": 0.5596536241180243,
      "grad_norm": 1.2272663628925047,
      "learning_rate": 9.663088718916031e-05,
      "loss": 0.6214,
      "step": 8725
    },
    {
      "epoch": 0.5599743425272611,
      "grad_norm": 0.9766333412497376,
      "learning_rate": 9.651900103543244e-05,
      "loss": 0.7342,
      "step": 8730
    },
    {
      "epoch": 0.5602950609364977,
      "grad_norm": 0.830624516454487,
      "learning_rate": 9.640711924451514e-05,
      "loss": 0.6718,
      "step": 8735
    },
    {
      "epoch": 0.5606157793457345,
      "grad_norm": 0.4675831817637492,
      "learning_rate": 9.629524195663219e-05,
      "loss": 0.6039,
      "step": 8740
    },
    {
      "epoch": 0.5609364977549711,
      "grad_norm": 0.6634840466913374,
      "learning_rate": 9.618336931200182e-05,
      "loss": 0.5964,
      "step": 8745
    },
    {
      "epoch": 0.5612572161642079,
      "grad_norm": 0.9976406641974719,
      "learning_rate": 9.607150145083642e-05,
      "loss": 0.7166,
      "step": 8750
    },
    {
      "epoch": 0.5615779345734445,
      "grad_norm": 0.9545013096296738,
      "learning_rate": 9.595963851334237e-05,
      "loss": 0.689,
      "step": 8755
    },
    {
      "epoch": 0.5618986529826812,
      "grad_norm": 0.9634333696652287,
      "learning_rate": 9.58477806397199e-05,
      "loss": 0.8048,
      "step": 8760
    },
    {
      "epoch": 0.5622193713919179,
      "grad_norm": 0.8057551483876174,
      "learning_rate": 9.573592797016285e-05,
      "loss": 0.672,
      "step": 8765
    },
    {
      "epoch": 0.5625400898011546,
      "grad_norm": 1.0000169919459303,
      "learning_rate": 9.562408064485858e-05,
      "loss": 0.656,
      "step": 8770
    },
    {
      "epoch": 0.5628608082103913,
      "grad_norm": 1.0059598561012926,
      "learning_rate": 9.551223880398778e-05,
      "loss": 0.6689,
      "step": 8775
    },
    {
      "epoch": 0.563181526619628,
      "grad_norm": 0.7089352756337184,
      "learning_rate": 9.540040258772413e-05,
      "loss": 0.6104,
      "step": 8780
    },
    {
      "epoch": 0.5635022450288647,
      "grad_norm": 0.9673260454868421,
      "learning_rate": 9.528857213623441e-05,
      "loss": 0.625,
      "step": 8785
    },
    {
      "epoch": 0.5638229634381013,
      "grad_norm": 0.8425769011906392,
      "learning_rate": 9.517674758967812e-05,
      "loss": 0.6385,
      "step": 8790
    },
    {
      "epoch": 0.564143681847338,
      "grad_norm": 0.8483079594314462,
      "learning_rate": 9.506492908820737e-05,
      "loss": 0.7091,
      "step": 8795
    },
    {
      "epoch": 0.5644644002565747,
      "grad_norm": 1.1949041204777606,
      "learning_rate": 9.495311677196663e-05,
      "loss": 0.5583,
      "step": 8800
    },
    {
      "epoch": 0.5647851186658114,
      "grad_norm": 1.1203988658358368,
      "learning_rate": 9.484131078109272e-05,
      "loss": 0.6491,
      "step": 8805
    },
    {
      "epoch": 0.5651058370750481,
      "grad_norm": 0.7171168814679133,
      "learning_rate": 9.472951125571447e-05,
      "loss": 0.5704,
      "step": 8810
    },
    {
      "epoch": 0.5654265554842848,
      "grad_norm": 0.43705154049643696,
      "learning_rate": 9.461771833595263e-05,
      "loss": 0.6235,
      "step": 8815
    },
    {
      "epoch": 0.5657472738935215,
      "grad_norm": 0.5972509611997564,
      "learning_rate": 9.450593216191962e-05,
      "loss": 0.6011,
      "step": 8820
    },
    {
      "epoch": 0.5660679923027582,
      "grad_norm": 0.6585353171844711,
      "learning_rate": 9.439415287371949e-05,
      "loss": 0.6338,
      "step": 8825
    },
    {
      "epoch": 0.5663887107119948,
      "grad_norm": 1.182861072860639,
      "learning_rate": 9.42823806114476e-05,
      "loss": 0.6286,
      "step": 8830
    },
    {
      "epoch": 0.5667094291212316,
      "grad_norm": 0.774985192783614,
      "learning_rate": 9.417061551519051e-05,
      "loss": 0.6362,
      "step": 8835
    },
    {
      "epoch": 0.5670301475304682,
      "grad_norm": 1.6279736397998856,
      "learning_rate": 9.405885772502582e-05,
      "loss": 0.5434,
      "step": 8840
    },
    {
      "epoch": 0.567350865939705,
      "grad_norm": 0.8603999240784707,
      "learning_rate": 9.394710738102198e-05,
      "loss": 0.7135,
      "step": 8845
    },
    {
      "epoch": 0.5676715843489416,
      "grad_norm": 0.8326631481896093,
      "learning_rate": 9.383536462323807e-05,
      "loss": 0.6316,
      "step": 8850
    },
    {
      "epoch": 0.5679923027581784,
      "grad_norm": 1.1396992210320314,
      "learning_rate": 9.372362959172364e-05,
      "loss": 0.6325,
      "step": 8855
    },
    {
      "epoch": 0.568313021167415,
      "grad_norm": 0.6117345152175109,
      "learning_rate": 9.361190242651864e-05,
      "loss": 0.6159,
      "step": 8860
    },
    {
      "epoch": 0.5686337395766518,
      "grad_norm": 0.9306563316596532,
      "learning_rate": 9.350018326765311e-05,
      "loss": 0.6533,
      "step": 8865
    },
    {
      "epoch": 0.5689544579858884,
      "grad_norm": 0.8930767778362739,
      "learning_rate": 9.338847225514708e-05,
      "loss": 0.6675,
      "step": 8870
    },
    {
      "epoch": 0.569275176395125,
      "grad_norm": 0.4141144493955828,
      "learning_rate": 9.327676952901034e-05,
      "loss": 0.5957,
      "step": 8875
    },
    {
      "epoch": 0.5695958948043618,
      "grad_norm": 0.8888417335481001,
      "learning_rate": 9.31650752292423e-05,
      "loss": 0.5665,
      "step": 8880
    },
    {
      "epoch": 0.5699166132135984,
      "grad_norm": 0.7603252238964692,
      "learning_rate": 9.305338949583183e-05,
      "loss": 0.6428,
      "step": 8885
    },
    {
      "epoch": 0.5702373316228352,
      "grad_norm": 1.271342150118716,
      "learning_rate": 9.294171246875705e-05,
      "loss": 0.7219,
      "step": 8890
    },
    {
      "epoch": 0.5705580500320718,
      "grad_norm": 0.9447555346689784,
      "learning_rate": 9.283004428798519e-05,
      "loss": 0.6965,
      "step": 8895
    },
    {
      "epoch": 0.5708787684413086,
      "grad_norm": 0.8678646764049435,
      "learning_rate": 9.271838509347233e-05,
      "loss": 0.7673,
      "step": 8900
    },
    {
      "epoch": 0.5711994868505452,
      "grad_norm": 0.7416908587434721,
      "learning_rate": 9.260673502516333e-05,
      "loss": 0.6081,
      "step": 8905
    },
    {
      "epoch": 0.5715202052597819,
      "grad_norm": 0.939422337464896,
      "learning_rate": 9.24950942229917e-05,
      "loss": 0.6721,
      "step": 8910
    },
    {
      "epoch": 0.5718409236690186,
      "grad_norm": 0.8506289909429936,
      "learning_rate": 9.238346282687912e-05,
      "loss": 0.7379,
      "step": 8915
    },
    {
      "epoch": 0.5721616420782553,
      "grad_norm": 1.3927657753594376,
      "learning_rate": 9.227184097673566e-05,
      "loss": 0.7231,
      "step": 8920
    },
    {
      "epoch": 0.572482360487492,
      "grad_norm": 0.6002814159409026,
      "learning_rate": 9.21602288124594e-05,
      "loss": 0.8172,
      "step": 8925
    },
    {
      "epoch": 0.5728030788967287,
      "grad_norm": 0.7935777728563393,
      "learning_rate": 9.204862647393625e-05,
      "loss": 0.8086,
      "step": 8930
    },
    {
      "epoch": 0.5731237973059654,
      "grad_norm": 1.0397353291637284,
      "learning_rate": 9.193703410103978e-05,
      "loss": 0.6631,
      "step": 8935
    },
    {
      "epoch": 0.573444515715202,
      "grad_norm": 0.8367031156015087,
      "learning_rate": 9.182545183363112e-05,
      "loss": 0.5788,
      "step": 8940
    },
    {
      "epoch": 0.5737652341244387,
      "grad_norm": 1.2325263908639137,
      "learning_rate": 9.17138798115587e-05,
      "loss": 0.7789,
      "step": 8945
    },
    {
      "epoch": 0.5740859525336754,
      "grad_norm": 0.9464147249819552,
      "learning_rate": 9.160231817465815e-05,
      "loss": 0.5279,
      "step": 8950
    },
    {
      "epoch": 0.5744066709429121,
      "grad_norm": 0.8158486660018726,
      "learning_rate": 9.149076706275207e-05,
      "loss": 0.7098,
      "step": 8955
    },
    {
      "epoch": 0.5747273893521488,
      "grad_norm": 0.7825563949372556,
      "learning_rate": 9.137922661564981e-05,
      "loss": 0.6993,
      "step": 8960
    },
    {
      "epoch": 0.5750481077613855,
      "grad_norm": 0.9955286924734048,
      "learning_rate": 9.126769697314741e-05,
      "loss": 0.6668,
      "step": 8965
    },
    {
      "epoch": 0.5753688261706222,
      "grad_norm": 0.987888018064567,
      "learning_rate": 9.11561782750274e-05,
      "loss": 0.7683,
      "step": 8970
    },
    {
      "epoch": 0.5756895445798589,
      "grad_norm": 0.9029264976754006,
      "learning_rate": 9.104467066105855e-05,
      "loss": 0.5976,
      "step": 8975
    },
    {
      "epoch": 0.5760102629890955,
      "grad_norm": 1.2083151109064707,
      "learning_rate": 9.093317427099567e-05,
      "loss": 0.7444,
      "step": 8980
    },
    {
      "epoch": 0.5763309813983323,
      "grad_norm": 0.627708721729255,
      "learning_rate": 9.082168924457963e-05,
      "loss": 0.5052,
      "step": 8985
    },
    {
      "epoch": 0.5766516998075689,
      "grad_norm": 0.818341174384118,
      "learning_rate": 9.071021572153699e-05,
      "loss": 0.6956,
      "step": 8990
    },
    {
      "epoch": 0.5769724182168057,
      "grad_norm": 0.7174427987431503,
      "learning_rate": 9.05987538415799e-05,
      "loss": 0.6537,
      "step": 8995
    },
    {
      "epoch": 0.5772931366260423,
      "grad_norm": 1.0123101523225277,
      "learning_rate": 9.048730374440593e-05,
      "loss": 0.6298,
      "step": 9000
    },
    {
      "epoch": 0.5776138550352791,
      "grad_norm": 1.4927380842347644,
      "learning_rate": 9.037586556969785e-05,
      "loss": 0.7866,
      "step": 9005
    },
    {
      "epoch": 0.5779345734445157,
      "grad_norm": 1.1107550009988214,
      "learning_rate": 9.026443945712355e-05,
      "loss": 0.5272,
      "step": 9010
    },
    {
      "epoch": 0.5782552918537524,
      "grad_norm": 1.042711051305287,
      "learning_rate": 9.015302554633572e-05,
      "loss": 0.6862,
      "step": 9015
    },
    {
      "epoch": 0.5785760102629891,
      "grad_norm": 1.097565575641477,
      "learning_rate": 9.004162397697183e-05,
      "loss": 0.6653,
      "step": 9020
    },
    {
      "epoch": 0.5788967286722257,
      "grad_norm": 0.7962187563904711,
      "learning_rate": 8.993023488865384e-05,
      "loss": 0.7807,
      "step": 9025
    },
    {
      "epoch": 0.5792174470814625,
      "grad_norm": 0.8018799159927662,
      "learning_rate": 8.981885842098807e-05,
      "loss": 0.6755,
      "step": 9030
    },
    {
      "epoch": 0.5795381654906991,
      "grad_norm": 1.0103385936451423,
      "learning_rate": 8.970749471356508e-05,
      "loss": 0.7498,
      "step": 9035
    },
    {
      "epoch": 0.5798588838999359,
      "grad_norm": 0.8540199269462798,
      "learning_rate": 8.959614390595933e-05,
      "loss": 0.7041,
      "step": 9040
    },
    {
      "epoch": 0.5801796023091725,
      "grad_norm": 1.1040345444470279,
      "learning_rate": 8.948480613772923e-05,
      "loss": 0.5949,
      "step": 9045
    },
    {
      "epoch": 0.5805003207184093,
      "grad_norm": 1.0463417093934197,
      "learning_rate": 8.93734815484167e-05,
      "loss": 0.6716,
      "step": 9050
    },
    {
      "epoch": 0.5808210391276459,
      "grad_norm": 0.9338670777982941,
      "learning_rate": 8.92621702775473e-05,
      "loss": 0.652,
      "step": 9055
    },
    {
      "epoch": 0.5811417575368826,
      "grad_norm": 0.8605449857576016,
      "learning_rate": 8.915087246462981e-05,
      "loss": 0.6335,
      "step": 9060
    },
    {
      "epoch": 0.5814624759461193,
      "grad_norm": 0.9482034036580209,
      "learning_rate": 8.903958824915616e-05,
      "loss": 0.7407,
      "step": 9065
    },
    {
      "epoch": 0.581783194355356,
      "grad_norm": 0.9120660938985135,
      "learning_rate": 8.892831777060128e-05,
      "loss": 0.714,
      "step": 9070
    },
    {
      "epoch": 0.5821039127645927,
      "grad_norm": 0.7546853050581628,
      "learning_rate": 8.881706116842277e-05,
      "loss": 0.6643,
      "step": 9075
    },
    {
      "epoch": 0.5824246311738294,
      "grad_norm": 0.7217266514190624,
      "learning_rate": 8.870581858206097e-05,
      "loss": 0.6232,
      "step": 9080
    },
    {
      "epoch": 0.5827453495830661,
      "grad_norm": 0.8122719551725256,
      "learning_rate": 8.859459015093856e-05,
      "loss": 0.753,
      "step": 9085
    },
    {
      "epoch": 0.5830660679923028,
      "grad_norm": 0.6978194557670415,
      "learning_rate": 8.848337601446056e-05,
      "loss": 0.592,
      "step": 9090
    },
    {
      "epoch": 0.5833867864015394,
      "grad_norm": 0.7490982355447477,
      "learning_rate": 8.8372176312014e-05,
      "loss": 0.6739,
      "step": 9095
    },
    {
      "epoch": 0.5837075048107762,
      "grad_norm": 1.074058776492988,
      "learning_rate": 8.826099118296781e-05,
      "loss": 0.6831,
      "step": 9100
    },
    {
      "epoch": 0.5840282232200128,
      "grad_norm": 0.7986527171477741,
      "learning_rate": 8.814982076667274e-05,
      "loss": 0.6572,
      "step": 9105
    },
    {
      "epoch": 0.5843489416292496,
      "grad_norm": 0.9594556597631692,
      "learning_rate": 8.803866520246111e-05,
      "loss": 0.6968,
      "step": 9110
    },
    {
      "epoch": 0.5846696600384862,
      "grad_norm": 0.8185832555992929,
      "learning_rate": 8.792752462964643e-05,
      "loss": 0.6396,
      "step": 9115
    },
    {
      "epoch": 0.584990378447723,
      "grad_norm": 0.830230327348044,
      "learning_rate": 8.781639918752364e-05,
      "loss": 0.6288,
      "step": 9120
    },
    {
      "epoch": 0.5853110968569596,
      "grad_norm": 1.260466190111766,
      "learning_rate": 8.770528901536866e-05,
      "loss": 0.6248,
      "step": 9125
    },
    {
      "epoch": 0.5856318152661962,
      "grad_norm": 0.7805742440541377,
      "learning_rate": 8.75941942524382e-05,
      "loss": 0.726,
      "step": 9130
    },
    {
      "epoch": 0.585952533675433,
      "grad_norm": 1.0612454515173708,
      "learning_rate": 8.748311503796971e-05,
      "loss": 0.6807,
      "step": 9135
    },
    {
      "epoch": 0.5862732520846696,
      "grad_norm": 0.8808610696974422,
      "learning_rate": 8.737205151118115e-05,
      "loss": 0.7349,
      "step": 9140
    },
    {
      "epoch": 0.5865939704939064,
      "grad_norm": 0.8397400084374878,
      "learning_rate": 8.726100381127084e-05,
      "loss": 0.677,
      "step": 9145
    },
    {
      "epoch": 0.586914688903143,
      "grad_norm": 1.3081126728734789,
      "learning_rate": 8.714997207741725e-05,
      "loss": 0.7485,
      "step": 9150
    },
    {
      "epoch": 0.5872354073123798,
      "grad_norm": 0.23647447615753048,
      "learning_rate": 8.703895644877877e-05,
      "loss": 0.5389,
      "step": 9155
    },
    {
      "epoch": 0.5875561257216164,
      "grad_norm": 1.0035423360368345,
      "learning_rate": 8.692795706449371e-05,
      "loss": 0.6547,
      "step": 9160
    },
    {
      "epoch": 0.5878768441308531,
      "grad_norm": 0.7176089252240778,
      "learning_rate": 8.681697406367997e-05,
      "loss": 0.6607,
      "step": 9165
    },
    {
      "epoch": 0.5881975625400898,
      "grad_norm": 0.8342266954014463,
      "learning_rate": 8.670600758543492e-05,
      "loss": 0.6957,
      "step": 9170
    },
    {
      "epoch": 0.5885182809493265,
      "grad_norm": 0.9577059909314858,
      "learning_rate": 8.659505776883523e-05,
      "loss": 0.7079,
      "step": 9175
    },
    {
      "epoch": 0.5888389993585632,
      "grad_norm": 0.5591665135253571,
      "learning_rate": 8.648412475293667e-05,
      "loss": 0.4696,
      "step": 9180
    },
    {
      "epoch": 0.5891597177677999,
      "grad_norm": 0.6612061534246185,
      "learning_rate": 8.637320867677395e-05,
      "loss": 0.8161,
      "step": 9185
    },
    {
      "epoch": 0.5894804361770366,
      "grad_norm": 0.7364614135023326,
      "learning_rate": 8.626230967936056e-05,
      "loss": 0.584,
      "step": 9190
    },
    {
      "epoch": 0.5898011545862732,
      "grad_norm": 1.1805347583614008,
      "learning_rate": 8.615142789968862e-05,
      "loss": 0.6749,
      "step": 9195
    },
    {
      "epoch": 0.5901218729955099,
      "grad_norm": 0.8670374427365669,
      "learning_rate": 8.604056347672862e-05,
      "loss": 0.6273,
      "step": 9200
    },
    {
      "epoch": 0.5904425914047466,
      "grad_norm": 0.9304848686764007,
      "learning_rate": 8.592971654942934e-05,
      "loss": 0.7438,
      "step": 9205
    },
    {
      "epoch": 0.5907633098139833,
      "grad_norm": 0.9747134027393929,
      "learning_rate": 8.581888725671756e-05,
      "loss": 0.6131,
      "step": 9210
    },
    {
      "epoch": 0.59108402822322,
      "grad_norm": 1.0129060114876993,
      "learning_rate": 8.570807573749803e-05,
      "loss": 0.7444,
      "step": 9215
    },
    {
      "epoch": 0.5914047466324567,
      "grad_norm": 0.860206331729887,
      "learning_rate": 8.559728213065322e-05,
      "loss": 0.71,
      "step": 9220
    },
    {
      "epoch": 0.5917254650416934,
      "grad_norm": 0.9817359438145173,
      "learning_rate": 8.548650657504312e-05,
      "loss": 0.6491,
      "step": 9225
    },
    {
      "epoch": 0.5920461834509301,
      "grad_norm": 0.7544658228792815,
      "learning_rate": 8.537574920950509e-05,
      "loss": 0.6348,
      "step": 9230
    },
    {
      "epoch": 0.5923669018601668,
      "grad_norm": 0.7630242666798073,
      "learning_rate": 8.526501017285371e-05,
      "loss": 0.6261,
      "step": 9235
    },
    {
      "epoch": 0.5926876202694035,
      "grad_norm": 0.9267179536684838,
      "learning_rate": 8.515428960388064e-05,
      "loss": 0.8258,
      "step": 9240
    },
    {
      "epoch": 0.5930083386786401,
      "grad_norm": 0.6784696630153367,
      "learning_rate": 8.504358764135423e-05,
      "loss": 0.707,
      "step": 9245
    },
    {
      "epoch": 0.5933290570878769,
      "grad_norm": 0.6689426887073786,
      "learning_rate": 8.49329044240197e-05,
      "loss": 0.751,
      "step": 9250
    },
    {
      "epoch": 0.5936497754971135,
      "grad_norm": 1.0074921827758931,
      "learning_rate": 8.482224009059867e-05,
      "loss": 0.7213,
      "step": 9255
    },
    {
      "epoch": 0.5939704939063503,
      "grad_norm": 0.6037825152713899,
      "learning_rate": 8.471159477978915e-05,
      "loss": 0.621,
      "step": 9260
    },
    {
      "epoch": 0.5942912123155869,
      "grad_norm": 0.6325399857778463,
      "learning_rate": 8.460096863026523e-05,
      "loss": 0.6925,
      "step": 9265
    },
    {
      "epoch": 0.5946119307248237,
      "grad_norm": 0.9785164961672185,
      "learning_rate": 8.449036178067706e-05,
      "loss": 0.7721,
      "step": 9270
    },
    {
      "epoch": 0.5949326491340603,
      "grad_norm": 0.8071126693831758,
      "learning_rate": 8.437977436965057e-05,
      "loss": 0.5628,
      "step": 9275
    },
    {
      "epoch": 0.5952533675432969,
      "grad_norm": 1.093008483996882,
      "learning_rate": 8.426920653578731e-05,
      "loss": 0.5135,
      "step": 9280
    },
    {
      "epoch": 0.5955740859525337,
      "grad_norm": 0.7334552943764545,
      "learning_rate": 8.415865841766437e-05,
      "loss": 0.6418,
      "step": 9285
    },
    {
      "epoch": 0.5958948043617703,
      "grad_norm": 0.9720157753455849,
      "learning_rate": 8.404813015383402e-05,
      "loss": 0.6855,
      "step": 9290
    },
    {
      "epoch": 0.5962155227710071,
      "grad_norm": 0.7988660585883463,
      "learning_rate": 8.39376218828237e-05,
      "loss": 0.5753,
      "step": 9295
    },
    {
      "epoch": 0.5965362411802437,
      "grad_norm": 1.1413457984041735,
      "learning_rate": 8.382713374313582e-05,
      "loss": 0.6003,
      "step": 9300
    },
    {
      "epoch": 0.5968569595894805,
      "grad_norm": 1.1011093623211472,
      "learning_rate": 8.371666587324753e-05,
      "loss": 0.7294,
      "step": 9305
    },
    {
      "epoch": 0.5971776779987171,
      "grad_norm": 0.9285733358885891,
      "learning_rate": 8.360621841161059e-05,
      "loss": 0.5484,
      "step": 9310
    },
    {
      "epoch": 0.5974983964079538,
      "grad_norm": 0.6748939404643401,
      "learning_rate": 8.349579149665111e-05,
      "loss": 0.6096,
      "step": 9315
    },
    {
      "epoch": 0.5978191148171905,
      "grad_norm": 0.9020042133223751,
      "learning_rate": 8.338538526676955e-05,
      "loss": 0.6025,
      "step": 9320
    },
    {
      "epoch": 0.5981398332264272,
      "grad_norm": 0.9270397135681554,
      "learning_rate": 8.32749998603404e-05,
      "loss": 0.7169,
      "step": 9325
    },
    {
      "epoch": 0.5984605516356639,
      "grad_norm": 0.9890377973574781,
      "learning_rate": 8.316463541571202e-05,
      "loss": 0.6308,
      "step": 9330
    },
    {
      "epoch": 0.5987812700449006,
      "grad_norm": 0.9865556224427305,
      "learning_rate": 8.305429207120657e-05,
      "loss": 0.6582,
      "step": 9335
    },
    {
      "epoch": 0.5991019884541373,
      "grad_norm": 0.7178728991086797,
      "learning_rate": 8.294396996511973e-05,
      "loss": 0.6433,
      "step": 9340
    },
    {
      "epoch": 0.599422706863374,
      "grad_norm": 0.9285152964545721,
      "learning_rate": 8.283366923572054e-05,
      "loss": 0.548,
      "step": 9345
    },
    {
      "epoch": 0.5997434252726106,
      "grad_norm": 1.0943546547273215,
      "learning_rate": 8.272339002125126e-05,
      "loss": 0.5401,
      "step": 9350
    },
    {
      "epoch": 0.6000641436818474,
      "grad_norm": 1.0722476752693422,
      "learning_rate": 8.261313245992719e-05,
      "loss": 0.7496,
      "step": 9355
    },
    {
      "epoch": 0.600384862091084,
      "grad_norm": 0.7239338874930329,
      "learning_rate": 8.250289668993651e-05,
      "loss": 0.6294,
      "step": 9360
    },
    {
      "epoch": 0.6007055805003207,
      "grad_norm": 0.8162856731878313,
      "learning_rate": 8.239268284944008e-05,
      "loss": 0.784,
      "step": 9365
    },
    {
      "epoch": 0.6010262989095574,
      "grad_norm": 0.8529031580797097,
      "learning_rate": 8.228249107657125e-05,
      "loss": 0.7338,
      "step": 9370
    },
    {
      "epoch": 0.6013470173187941,
      "grad_norm": 0.914197482847494,
      "learning_rate": 8.217232150943575e-05,
      "loss": 0.6738,
      "step": 9375
    },
    {
      "epoch": 0.6016677357280308,
      "grad_norm": 0.561817894827455,
      "learning_rate": 8.20621742861114e-05,
      "loss": 0.4924,
      "step": 9380
    },
    {
      "epoch": 0.6019884541372674,
      "grad_norm": 0.8679917658001024,
      "learning_rate": 8.19520495446481e-05,
      "loss": 0.8074,
      "step": 9385
    },
    {
      "epoch": 0.6023091725465042,
      "grad_norm": 1.0120069230072926,
      "learning_rate": 8.184194742306756e-05,
      "loss": 0.7112,
      "step": 9390
    },
    {
      "epoch": 0.6026298909557408,
      "grad_norm": 0.7356825859409829,
      "learning_rate": 8.173186805936313e-05,
      "loss": 0.6514,
      "step": 9395
    },
    {
      "epoch": 0.6029506093649776,
      "grad_norm": 0.7794340302339006,
      "learning_rate": 8.162181159149964e-05,
      "loss": 0.7748,
      "step": 9400
    },
    {
      "epoch": 0.6032713277742142,
      "grad_norm": 0.9190740265202144,
      "learning_rate": 8.151177815741318e-05,
      "loss": 0.6399,
      "step": 9405
    },
    {
      "epoch": 0.603592046183451,
      "grad_norm": 1.1526131658530894,
      "learning_rate": 8.140176789501102e-05,
      "loss": 0.7519,
      "step": 9410
    },
    {
      "epoch": 0.6039127645926876,
      "grad_norm": 0.8970675006265497,
      "learning_rate": 8.129178094217141e-05,
      "loss": 0.7025,
      "step": 9415
    },
    {
      "epoch": 0.6042334830019244,
      "grad_norm": 1.16563982635486,
      "learning_rate": 8.118181743674334e-05,
      "loss": 0.6515,
      "step": 9420
    },
    {
      "epoch": 0.604554201411161,
      "grad_norm": 1.009328430894082,
      "learning_rate": 8.107187751654642e-05,
      "loss": 0.8061,
      "step": 9425
    },
    {
      "epoch": 0.6048749198203976,
      "grad_norm": 0.6431656020123224,
      "learning_rate": 8.096196131937068e-05,
      "loss": 0.7703,
      "step": 9430
    },
    {
      "epoch": 0.6051956382296344,
      "grad_norm": 0.8022392814347792,
      "learning_rate": 8.085206898297648e-05,
      "loss": 0.4945,
      "step": 9435
    },
    {
      "epoch": 0.605516356638871,
      "grad_norm": 0.8590402951031166,
      "learning_rate": 8.074220064509428e-05,
      "loss": 0.577,
      "step": 9440
    },
    {
      "epoch": 0.6058370750481078,
      "grad_norm": 0.6529036302559359,
      "learning_rate": 8.06323564434243e-05,
      "loss": 0.6972,
      "step": 9445
    },
    {
      "epoch": 0.6061577934573444,
      "grad_norm": 0.9053770255851836,
      "learning_rate": 8.052253651563671e-05,
      "loss": 0.6241,
      "step": 9450
    },
    {
      "epoch": 0.6064785118665812,
      "grad_norm": 0.6968143227671041,
      "learning_rate": 8.04127409993712e-05,
      "loss": 0.7196,
      "step": 9455
    },
    {
      "epoch": 0.6067992302758178,
      "grad_norm": 0.7907742358273027,
      "learning_rate": 8.030297003223676e-05,
      "loss": 0.6535,
      "step": 9460
    },
    {
      "epoch": 0.6071199486850545,
      "grad_norm": 0.9043816519851674,
      "learning_rate": 8.019322375181175e-05,
      "loss": 0.7183,
      "step": 9465
    },
    {
      "epoch": 0.6074406670942912,
      "grad_norm": 0.8583282541776323,
      "learning_rate": 8.008350229564351e-05,
      "loss": 0.7373,
      "step": 9470
    },
    {
      "epoch": 0.6077613855035279,
      "grad_norm": 1.1639398571753123,
      "learning_rate": 7.997380580124832e-05,
      "loss": 0.6619,
      "step": 9475
    },
    {
      "epoch": 0.6080821039127646,
      "grad_norm": 0.7363838290393571,
      "learning_rate": 7.986413440611115e-05,
      "loss": 0.5238,
      "step": 9480
    },
    {
      "epoch": 0.6084028223220013,
      "grad_norm": 0.7361031316329811,
      "learning_rate": 7.975448824768546e-05,
      "loss": 0.7093,
      "step": 9485
    },
    {
      "epoch": 0.608723540731238,
      "grad_norm": 0.8655976177215603,
      "learning_rate": 7.964486746339315e-05,
      "loss": 0.6699,
      "step": 9490
    },
    {
      "epoch": 0.6090442591404747,
      "grad_norm": 0.7757949116609816,
      "learning_rate": 7.95352721906243e-05,
      "loss": 0.6457,
      "step": 9495
    },
    {
      "epoch": 0.6093649775497113,
      "grad_norm": 1.0532442121286478,
      "learning_rate": 7.942570256673704e-05,
      "loss": 0.8266,
      "step": 9500
    },
    {
      "epoch": 0.6096856959589481,
      "grad_norm": 0.8097807634079536,
      "learning_rate": 7.931615872905727e-05,
      "loss": 0.6542,
      "step": 9505
    },
    {
      "epoch": 0.6100064143681847,
      "grad_norm": 1.170352424739306,
      "learning_rate": 7.92066408148787e-05,
      "loss": 0.6511,
      "step": 9510
    },
    {
      "epoch": 0.6103271327774215,
      "grad_norm": 0.6465117473629731,
      "learning_rate": 7.909714896146239e-05,
      "loss": 0.6102,
      "step": 9515
    },
    {
      "epoch": 0.6106478511866581,
      "grad_norm": 0.9562444288916828,
      "learning_rate": 7.898768330603687e-05,
      "loss": 0.7281,
      "step": 9520
    },
    {
      "epoch": 0.6109685695958949,
      "grad_norm": 0.48629635257867143,
      "learning_rate": 7.887824398579778e-05,
      "loss": 0.5576,
      "step": 9525
    },
    {
      "epoch": 0.6112892880051315,
      "grad_norm": 0.6187174821618042,
      "learning_rate": 7.876883113790777e-05,
      "loss": 0.4536,
      "step": 9530
    },
    {
      "epoch": 0.6116100064143681,
      "grad_norm": 0.8491363897597337,
      "learning_rate": 7.865944489949632e-05,
      "loss": 0.5082,
      "step": 9535
    },
    {
      "epoch": 0.6119307248236049,
      "grad_norm": 0.9489825766872471,
      "learning_rate": 7.855008540765954e-05,
      "loss": 0.8288,
      "step": 9540
    },
    {
      "epoch": 0.6122514432328415,
      "grad_norm": 0.8247180962617905,
      "learning_rate": 7.844075279945998e-05,
      "loss": 0.7947,
      "step": 9545
    },
    {
      "epoch": 0.6125721616420783,
      "grad_norm": 0.8487499152582451,
      "learning_rate": 7.833144721192658e-05,
      "loss": 0.4836,
      "step": 9550
    },
    {
      "epoch": 0.6128928800513149,
      "grad_norm": 1.4749421151082263,
      "learning_rate": 7.822216878205437e-05,
      "loss": 0.6604,
      "step": 9555
    },
    {
      "epoch": 0.6132135984605517,
      "grad_norm": 0.6439839118081867,
      "learning_rate": 7.811291764680436e-05,
      "loss": 0.5311,
      "step": 9560
    },
    {
      "epoch": 0.6135343168697883,
      "grad_norm": 0.6948565188236483,
      "learning_rate": 7.800369394310329e-05,
      "loss": 0.7818,
      "step": 9565
    },
    {
      "epoch": 0.613855035279025,
      "grad_norm": 0.5432098551962209,
      "learning_rate": 7.789449780784361e-05,
      "loss": 0.4817,
      "step": 9570
    },
    {
      "epoch": 0.6141757536882617,
      "grad_norm": 0.8116998264643036,
      "learning_rate": 7.778532937788319e-05,
      "loss": 0.6809,
      "step": 9575
    },
    {
      "epoch": 0.6144964720974984,
      "grad_norm": 0.927156766210116,
      "learning_rate": 7.767618879004509e-05,
      "loss": 0.6117,
      "step": 9580
    },
    {
      "epoch": 0.6148171905067351,
      "grad_norm": 0.5580255415813408,
      "learning_rate": 7.756707618111758e-05,
      "loss": 0.5121,
      "step": 9585
    },
    {
      "epoch": 0.6151379089159718,
      "grad_norm": 0.7697324881673694,
      "learning_rate": 7.745799168785387e-05,
      "loss": 0.7019,
      "step": 9590
    },
    {
      "epoch": 0.6154586273252085,
      "grad_norm": 1.2533080746391783,
      "learning_rate": 7.734893544697182e-05,
      "loss": 0.6921,
      "step": 9595
    },
    {
      "epoch": 0.6157793457344451,
      "grad_norm": 0.8591968885866408,
      "learning_rate": 7.723990759515399e-05,
      "loss": 0.6234,
      "step": 9600
    },
    {
      "epoch": 0.6161000641436819,
      "grad_norm": 0.8144982447654572,
      "learning_rate": 7.713090826904732e-05,
      "loss": 0.6175,
      "step": 9605
    },
    {
      "epoch": 0.6164207825529185,
      "grad_norm": 0.7852604055969639,
      "learning_rate": 7.702193760526301e-05,
      "loss": 0.538,
      "step": 9610
    },
    {
      "epoch": 0.6167415009621552,
      "grad_norm": 0.82507022800839,
      "learning_rate": 7.691299574037633e-05,
      "loss": 0.5858,
      "step": 9615
    },
    {
      "epoch": 0.6170622193713919,
      "grad_norm": 0.8977703001606776,
      "learning_rate": 7.68040828109264e-05,
      "loss": 0.6686,
      "step": 9620
    },
    {
      "epoch": 0.6173829377806286,
      "grad_norm": 0.7575641120784353,
      "learning_rate": 7.669519895341618e-05,
      "loss": 0.6733,
      "step": 9625
    },
    {
      "epoch": 0.6177036561898653,
      "grad_norm": 0.7782783108716851,
      "learning_rate": 7.658634430431211e-05,
      "loss": 0.6113,
      "step": 9630
    },
    {
      "epoch": 0.618024374599102,
      "grad_norm": 0.8737688527317737,
      "learning_rate": 7.647751900004408e-05,
      "loss": 0.7703,
      "step": 9635
    },
    {
      "epoch": 0.6183450930083387,
      "grad_norm": 0.7163537021531532,
      "learning_rate": 7.63687231770052e-05,
      "loss": 0.6687,
      "step": 9640
    },
    {
      "epoch": 0.6186658114175754,
      "grad_norm": 0.7383194119362961,
      "learning_rate": 7.625995697155153e-05,
      "loss": 0.7192,
      "step": 9645
    },
    {
      "epoch": 0.618986529826812,
      "grad_norm": 0.7818780084969111,
      "learning_rate": 7.615122052000212e-05,
      "loss": 0.4781,
      "step": 9650
    },
    {
      "epoch": 0.6193072482360488,
      "grad_norm": 0.9549919791876611,
      "learning_rate": 7.604251395863868e-05,
      "loss": 0.5972,
      "step": 9655
    },
    {
      "epoch": 0.6196279666452854,
      "grad_norm": 0.9266947067171263,
      "learning_rate": 7.593383742370547e-05,
      "loss": 0.7661,
      "step": 9660
    },
    {
      "epoch": 0.6199486850545222,
      "grad_norm": 0.7815262374564014,
      "learning_rate": 7.582519105140915e-05,
      "loss": 0.844,
      "step": 9665
    },
    {
      "epoch": 0.6202694034637588,
      "grad_norm": 0.9851958882202488,
      "learning_rate": 7.571657497791855e-05,
      "loss": 0.6573,
      "step": 9670
    },
    {
      "epoch": 0.6205901218729956,
      "grad_norm": 0.863915136317819,
      "learning_rate": 7.560798933936446e-05,
      "loss": 0.6965,
      "step": 9675
    },
    {
      "epoch": 0.6209108402822322,
      "grad_norm": 0.8169772635721835,
      "learning_rate": 7.549943427183963e-05,
      "loss": 0.6739,
      "step": 9680
    },
    {
      "epoch": 0.6212315586914688,
      "grad_norm": 0.9621597430987586,
      "learning_rate": 7.539090991139843e-05,
      "loss": 0.7107,
      "step": 9685
    },
    {
      "epoch": 0.6215522771007056,
      "grad_norm": 1.1682951488621962,
      "learning_rate": 7.52824163940568e-05,
      "loss": 0.7016,
      "step": 9690
    },
    {
      "epoch": 0.6218729955099422,
      "grad_norm": 0.5988705115634277,
      "learning_rate": 7.517395385579198e-05,
      "loss": 0.5883,
      "step": 9695
    },
    {
      "epoch": 0.622193713919179,
      "grad_norm": 0.6405875029114282,
      "learning_rate": 7.506552243254235e-05,
      "loss": 0.5632,
      "step": 9700
    },
    {
      "epoch": 0.6225144323284156,
      "grad_norm": 0.9039124102611747,
      "learning_rate": 7.49571222602074e-05,
      "loss": 0.5569,
      "step": 9705
    },
    {
      "epoch": 0.6228351507376524,
      "grad_norm": 1.1918655890149419,
      "learning_rate": 7.484875347464731e-05,
      "loss": 0.755,
      "step": 9710
    },
    {
      "epoch": 0.623155869146889,
      "grad_norm": 2.014073968409583,
      "learning_rate": 7.474041621168304e-05,
      "loss": 0.6472,
      "step": 9715
    },
    {
      "epoch": 0.6234765875561257,
      "grad_norm": 0.8921505648356219,
      "learning_rate": 7.4632110607096e-05,
      "loss": 0.8289,
      "step": 9720
    },
    {
      "epoch": 0.6237973059653624,
      "grad_norm": 1.1073242240733232,
      "learning_rate": 7.452383679662794e-05,
      "loss": 0.6634,
      "step": 9725
    },
    {
      "epoch": 0.6241180243745991,
      "grad_norm": 1.1492204881968546,
      "learning_rate": 7.441559491598072e-05,
      "loss": 0.6672,
      "step": 9730
    },
    {
      "epoch": 0.6244387427838358,
      "grad_norm": 1.2072073594662214,
      "learning_rate": 7.43073851008162e-05,
      "loss": 0.6821,
      "step": 9735
    },
    {
      "epoch": 0.6247594611930725,
      "grad_norm": 0.7796944953436583,
      "learning_rate": 7.41992074867561e-05,
      "loss": 0.5997,
      "step": 9740
    },
    {
      "epoch": 0.6250801796023092,
      "grad_norm": 0.8744950902348806,
      "learning_rate": 7.40910622093817e-05,
      "loss": 0.8027,
      "step": 9745
    },
    {
      "epoch": 0.6254008980115459,
      "grad_norm": 0.5663128313006088,
      "learning_rate": 7.398294940423382e-05,
      "loss": 0.6558,
      "step": 9750
    },
    {
      "epoch": 0.6257216164207825,
      "grad_norm": 1.03786462429062,
      "learning_rate": 7.387486920681251e-05,
      "loss": 0.7204,
      "step": 9755
    },
    {
      "epoch": 0.6260423348300193,
      "grad_norm": 1.0086514423501614,
      "learning_rate": 7.376682175257703e-05,
      "loss": 0.5726,
      "step": 9760
    },
    {
      "epoch": 0.6263630532392559,
      "grad_norm": 0.7340138238860899,
      "learning_rate": 7.365880717694558e-05,
      "loss": 0.6003,
      "step": 9765
    },
    {
      "epoch": 0.6266837716484926,
      "grad_norm": 1.0154279037896083,
      "learning_rate": 7.355082561529511e-05,
      "loss": 0.6518,
      "step": 9770
    },
    {
      "epoch": 0.6270044900577293,
      "grad_norm": 1.1008265637631556,
      "learning_rate": 7.344287720296128e-05,
      "loss": 0.6493,
      "step": 9775
    },
    {
      "epoch": 0.627325208466966,
      "grad_norm": 0.8136002565232989,
      "learning_rate": 7.333496207523805e-05,
      "loss": 0.7117,
      "step": 9780
    },
    {
      "epoch": 0.6276459268762027,
      "grad_norm": 0.5762089560179455,
      "learning_rate": 7.322708036737784e-05,
      "loss": 0.4664,
      "step": 9785
    },
    {
      "epoch": 0.6279666452854393,
      "grad_norm": 0.8389502685505456,
      "learning_rate": 7.311923221459108e-05,
      "loss": 0.6836,
      "step": 9790
    },
    {
      "epoch": 0.6282873636946761,
      "grad_norm": 0.7980523725918469,
      "learning_rate": 7.301141775204614e-05,
      "loss": 0.6824,
      "step": 9795
    },
    {
      "epoch": 0.6286080821039127,
      "grad_norm": 1.1727596107618312,
      "learning_rate": 7.290363711486923e-05,
      "loss": 0.6435,
      "step": 9800
    },
    {
      "epoch": 0.6289288005131495,
      "grad_norm": 0.4755883693546517,
      "learning_rate": 7.279589043814413e-05,
      "loss": 0.7567,
      "step": 9805
    },
    {
      "epoch": 0.6292495189223861,
      "grad_norm": 0.59249663501007,
      "learning_rate": 7.268817785691204e-05,
      "loss": 0.6907,
      "step": 9810
    },
    {
      "epoch": 0.6295702373316229,
      "grad_norm": 0.848542013217018,
      "learning_rate": 7.258049950617146e-05,
      "loss": 0.6471,
      "step": 9815
    },
    {
      "epoch": 0.6298909557408595,
      "grad_norm": 1.047981392744028,
      "learning_rate": 7.247285552087797e-05,
      "loss": 0.5712,
      "step": 9820
    },
    {
      "epoch": 0.6302116741500963,
      "grad_norm": 0.8916612499406957,
      "learning_rate": 7.236524603594406e-05,
      "loss": 0.6496,
      "step": 9825
    },
    {
      "epoch": 0.6305323925593329,
      "grad_norm": 0.810154490032121,
      "learning_rate": 7.225767118623906e-05,
      "loss": 0.5871,
      "step": 9830
    },
    {
      "epoch": 0.6308531109685696,
      "grad_norm": 0.8722001341085496,
      "learning_rate": 7.215013110658875e-05,
      "loss": 0.643,
      "step": 9835
    },
    {
      "epoch": 0.6311738293778063,
      "grad_norm": 0.6036268039451337,
      "learning_rate": 7.204262593177551e-05,
      "loss": 0.6787,
      "step": 9840
    },
    {
      "epoch": 0.631494547787043,
      "grad_norm": 1.1616717351436967,
      "learning_rate": 7.193515579653777e-05,
      "loss": 0.5542,
      "step": 9845
    },
    {
      "epoch": 0.6318152661962797,
      "grad_norm": 0.8131100593226482,
      "learning_rate": 7.182772083557022e-05,
      "loss": 0.7859,
      "step": 9850
    },
    {
      "epoch": 0.6321359846055163,
      "grad_norm": 0.876808117538372,
      "learning_rate": 7.172032118352338e-05,
      "loss": 0.6484,
      "step": 9855
    },
    {
      "epoch": 0.6324567030147531,
      "grad_norm": 0.8713054808471165,
      "learning_rate": 7.161295697500353e-05,
      "loss": 0.6265,
      "step": 9860
    },
    {
      "epoch": 0.6327774214239897,
      "grad_norm": 1.023366348564304,
      "learning_rate": 7.150562834457257e-05,
      "loss": 0.5939,
      "step": 9865
    },
    {
      "epoch": 0.6330981398332264,
      "grad_norm": 0.7588376669281691,
      "learning_rate": 7.13983354267477e-05,
      "loss": 0.7873,
      "step": 9870
    },
    {
      "epoch": 0.6334188582424631,
      "grad_norm": 1.028561424510279,
      "learning_rate": 7.129107835600149e-05,
      "loss": 0.6212,
      "step": 9875
    },
    {
      "epoch": 0.6337395766516998,
      "grad_norm": 0.5002948721851668,
      "learning_rate": 7.118385726676148e-05,
      "loss": 0.6269,
      "step": 9880
    },
    {
      "epoch": 0.6340602950609365,
      "grad_norm": 0.6840341058593294,
      "learning_rate": 7.10766722934102e-05,
      "loss": 0.6232,
      "step": 9885
    },
    {
      "epoch": 0.6343810134701732,
      "grad_norm": 1.1628940715108431,
      "learning_rate": 7.096952357028486e-05,
      "loss": 0.7978,
      "step": 9890
    },
    {
      "epoch": 0.6347017318794099,
      "grad_norm": 0.8853939814346806,
      "learning_rate": 7.086241123167722e-05,
      "loss": 0.6057,
      "step": 9895
    },
    {
      "epoch": 0.6350224502886466,
      "grad_norm": 0.7451557600335174,
      "learning_rate": 7.07553354118335e-05,
      "loss": 0.7038,
      "step": 9900
    },
    {
      "epoch": 0.6353431686978832,
      "grad_norm": 1.40409713973294,
      "learning_rate": 7.064829624495415e-05,
      "loss": 0.6721,
      "step": 9905
    },
    {
      "epoch": 0.63566388710712,
      "grad_norm": 0.8791535681920543,
      "learning_rate": 7.054129386519356e-05,
      "loss": 0.7629,
      "step": 9910
    },
    {
      "epoch": 0.6359846055163566,
      "grad_norm": 0.6562938490531729,
      "learning_rate": 7.043432840666015e-05,
      "loss": 0.6885,
      "step": 9915
    },
    {
      "epoch": 0.6363053239255934,
      "grad_norm": 0.8475306109482822,
      "learning_rate": 7.032740000341604e-05,
      "loss": 0.6528,
      "step": 9920
    },
    {
      "epoch": 0.63662604233483,
      "grad_norm": 1.0340930274606936,
      "learning_rate": 7.022050878947683e-05,
      "loss": 0.5579,
      "step": 9925
    },
    {
      "epoch": 0.6369467607440668,
      "grad_norm": 0.892410748846026,
      "learning_rate": 7.011365489881164e-05,
      "loss": 0.622,
      "step": 9930
    },
    {
      "epoch": 0.6372674791533034,
      "grad_norm": 1.026899828920046,
      "learning_rate": 7.000683846534268e-05,
      "loss": 0.7173,
      "step": 9935
    },
    {
      "epoch": 0.63758819756254,
      "grad_norm": 0.7906424850106287,
      "learning_rate": 6.99000596229453e-05,
      "loss": 0.6518,
      "step": 9940
    },
    {
      "epoch": 0.6379089159717768,
      "grad_norm": 0.885516437560555,
      "learning_rate": 6.979331850544772e-05,
      "loss": 0.7629,
      "step": 9945
    },
    {
      "epoch": 0.6382296343810134,
      "grad_norm": 1.2585108576804727,
      "learning_rate": 6.968661524663085e-05,
      "loss": 0.5346,
      "step": 9950
    },
    {
      "epoch": 0.6385503527902502,
      "grad_norm": 0.6378216033005294,
      "learning_rate": 6.957994998022817e-05,
      "loss": 0.5599,
      "step": 9955
    },
    {
      "epoch": 0.6388710711994868,
      "grad_norm": 1.0857649237283717,
      "learning_rate": 6.947332283992553e-05,
      "loss": 0.5546,
      "step": 9960
    },
    {
      "epoch": 0.6391917896087236,
      "grad_norm": 0.7485103608812504,
      "learning_rate": 6.936673395936103e-05,
      "loss": 0.7607,
      "step": 9965
    },
    {
      "epoch": 0.6395125080179602,
      "grad_norm": 0.6831137045570516,
      "learning_rate": 6.926018347212482e-05,
      "loss": 0.7246,
      "step": 9970
    },
    {
      "epoch": 0.6398332264271969,
      "grad_norm": 0.8371300993555119,
      "learning_rate": 6.915367151175887e-05,
      "loss": 0.7647,
      "step": 9975
    },
    {
      "epoch": 0.6401539448364336,
      "grad_norm": 0.6790794293309601,
      "learning_rate": 6.904719821175691e-05,
      "loss": 0.709,
      "step": 9980
    },
    {
      "epoch": 0.6404746632456703,
      "grad_norm": 1.2809292980337206,
      "learning_rate": 6.894076370556419e-05,
      "loss": 0.7072,
      "step": 9985
    },
    {
      "epoch": 0.640795381654907,
      "grad_norm": 0.6309070049475263,
      "learning_rate": 6.883436812657736e-05,
      "loss": 0.7517,
      "step": 9990
    },
    {
      "epoch": 0.6411161000641437,
      "grad_norm": 0.7057857328226916,
      "learning_rate": 6.872801160814429e-05,
      "loss": 0.5892,
      "step": 9995
    },
    {
      "epoch": 0.6414368184733804,
      "grad_norm": 0.6684609047663461,
      "learning_rate": 6.862169428356391e-05,
      "loss": 0.7041,
      "step": 10000
    },
    {
      "epoch": 0.641757536882617,
      "grad_norm": 0.9825781560923286,
      "learning_rate": 6.851541628608593e-05,
      "loss": 0.5732,
      "step": 10005
    },
    {
      "epoch": 0.6420782552918538,
      "grad_norm": 0.6656401212815036,
      "learning_rate": 6.840917774891089e-05,
      "loss": 0.6996,
      "step": 10010
    },
    {
      "epoch": 0.6423989737010904,
      "grad_norm": 1.0284673996842317,
      "learning_rate": 6.830297880518982e-05,
      "loss": 0.6385,
      "step": 10015
    },
    {
      "epoch": 0.6427196921103271,
      "grad_norm": 1.3813453443085013,
      "learning_rate": 6.819681958802411e-05,
      "loss": 0.8024,
      "step": 10020
    },
    {
      "epoch": 0.6430404105195638,
      "grad_norm": 1.0439998261378045,
      "learning_rate": 6.809070023046542e-05,
      "loss": 0.7246,
      "step": 10025
    },
    {
      "epoch": 0.6433611289288005,
      "grad_norm": 1.3726132291968678,
      "learning_rate": 6.798462086551536e-05,
      "loss": 0.7607,
      "step": 10030
    },
    {
      "epoch": 0.6436818473380372,
      "grad_norm": 0.696112632783953,
      "learning_rate": 6.78785816261255e-05,
      "loss": 0.6657,
      "step": 10035
    },
    {
      "epoch": 0.6440025657472739,
      "grad_norm": 0.9271308758677715,
      "learning_rate": 6.777258264519712e-05,
      "loss": 0.7089,
      "step": 10040
    },
    {
      "epoch": 0.6443232841565106,
      "grad_norm": 0.971107223858267,
      "learning_rate": 6.766662405558095e-05,
      "loss": 0.7127,
      "step": 10045
    },
    {
      "epoch": 0.6446440025657473,
      "grad_norm": 1.1077553805147324,
      "learning_rate": 6.756070599007717e-05,
      "loss": 0.6674,
      "step": 10050
    },
    {
      "epoch": 0.6449647209749839,
      "grad_norm": 1.1241145720577337,
      "learning_rate": 6.745482858143519e-05,
      "loss": 0.6908,
      "step": 10055
    },
    {
      "epoch": 0.6452854393842207,
      "grad_norm": 1.0311402231942566,
      "learning_rate": 6.734899196235342e-05,
      "loss": 0.5903,
      "step": 10060
    },
    {
      "epoch": 0.6456061577934573,
      "grad_norm": 1.1164020984789884,
      "learning_rate": 6.724319626547916e-05,
      "loss": 0.7299,
      "step": 10065
    },
    {
      "epoch": 0.6459268762026941,
      "grad_norm": 0.862577581408513,
      "learning_rate": 6.71374416234084e-05,
      "loss": 0.6447,
      "step": 10070
    },
    {
      "epoch": 0.6462475946119307,
      "grad_norm": 0.6813994701366789,
      "learning_rate": 6.703172816868575e-05,
      "loss": 0.6327,
      "step": 10075
    },
    {
      "epoch": 0.6465683130211675,
      "grad_norm": 0.8916563918460675,
      "learning_rate": 6.69260560338041e-05,
      "loss": 0.5921,
      "step": 10080
    },
    {
      "epoch": 0.6468890314304041,
      "grad_norm": 0.9332137514439207,
      "learning_rate": 6.682042535120463e-05,
      "loss": 0.6558,
      "step": 10085
    },
    {
      "epoch": 0.6472097498396407,
      "grad_norm": 0.83477107809383,
      "learning_rate": 6.67148362532765e-05,
      "loss": 0.6404,
      "step": 10090
    },
    {
      "epoch": 0.6475304682488775,
      "grad_norm": 1.2218962185380584,
      "learning_rate": 6.66092888723568e-05,
      "loss": 0.6856,
      "step": 10095
    },
    {
      "epoch": 0.6478511866581141,
      "grad_norm": 0.5613953193652488,
      "learning_rate": 6.650378334073036e-05,
      "loss": 0.5747,
      "step": 10100
    },
    {
      "epoch": 0.6481719050673509,
      "grad_norm": 1.161315529719475,
      "learning_rate": 6.639831979062952e-05,
      "loss": 0.7714,
      "step": 10105
    },
    {
      "epoch": 0.6484926234765875,
      "grad_norm": 1.2013466455307917,
      "learning_rate": 6.629289835423393e-05,
      "loss": 0.7067,
      "step": 10110
    },
    {
      "epoch": 0.6488133418858243,
      "grad_norm": 0.8985970817080027,
      "learning_rate": 6.618751916367061e-05,
      "loss": 0.8022,
      "step": 10115
    },
    {
      "epoch": 0.6491340602950609,
      "grad_norm": 1.2136972519623022,
      "learning_rate": 6.608218235101352e-05,
      "loss": 0.6141,
      "step": 10120
    },
    {
      "epoch": 0.6494547787042976,
      "grad_norm": 0.9718583450791072,
      "learning_rate": 6.597688804828353e-05,
      "loss": 0.5938,
      "step": 10125
    },
    {
      "epoch": 0.6497754971135343,
      "grad_norm": 0.9547734637829278,
      "learning_rate": 6.587163638744827e-05,
      "loss": 0.6992,
      "step": 10130
    },
    {
      "epoch": 0.650096215522771,
      "grad_norm": 0.9151909021410464,
      "learning_rate": 6.57664275004219e-05,
      "loss": 0.7343,
      "step": 10135
    },
    {
      "epoch": 0.6504169339320077,
      "grad_norm": 1.5971760196514397,
      "learning_rate": 6.566126151906498e-05,
      "loss": 0.7017,
      "step": 10140
    },
    {
      "epoch": 0.6507376523412444,
      "grad_norm": 0.8126791037548418,
      "learning_rate": 6.555613857518425e-05,
      "loss": 0.6567,
      "step": 10145
    },
    {
      "epoch": 0.6510583707504811,
      "grad_norm": 0.7571219128173635,
      "learning_rate": 6.545105880053258e-05,
      "loss": 0.6871,
      "step": 10150
    },
    {
      "epoch": 0.6513790891597178,
      "grad_norm": 0.688497347517119,
      "learning_rate": 6.534602232680869e-05,
      "loss": 0.7347,
      "step": 10155
    },
    {
      "epoch": 0.6516998075689544,
      "grad_norm": 0.8955793200079804,
      "learning_rate": 6.524102928565706e-05,
      "loss": 0.5972,
      "step": 10160
    },
    {
      "epoch": 0.6520205259781912,
      "grad_norm": 0.9443767111598063,
      "learning_rate": 6.513607980866768e-05,
      "loss": 0.723,
      "step": 10165
    },
    {
      "epoch": 0.6523412443874278,
      "grad_norm": 0.8214020012837946,
      "learning_rate": 6.5031174027376e-05,
      "loss": 0.7531,
      "step": 10170
    },
    {
      "epoch": 0.6526619627966646,
      "grad_norm": 0.9405554364877039,
      "learning_rate": 6.492631207326271e-05,
      "loss": 0.6579,
      "step": 10175
    },
    {
      "epoch": 0.6529826812059012,
      "grad_norm": 0.8528480386187783,
      "learning_rate": 6.482149407775348e-05,
      "loss": 0.6639,
      "step": 10180
    },
    {
      "epoch": 0.653303399615138,
      "grad_norm": 1.0215536554217552,
      "learning_rate": 6.471672017221897e-05,
      "loss": 0.6788,
      "step": 10185
    },
    {
      "epoch": 0.6536241180243746,
      "grad_norm": 1.0458906526223661,
      "learning_rate": 6.461199048797457e-05,
      "loss": 0.7466,
      "step": 10190
    },
    {
      "epoch": 0.6539448364336113,
      "grad_norm": 0.7250104664732925,
      "learning_rate": 6.450730515628025e-05,
      "loss": 0.4862,
      "step": 10195
    },
    {
      "epoch": 0.654265554842848,
      "grad_norm": 1.1562228223771571,
      "learning_rate": 6.440266430834035e-05,
      "loss": 0.7554,
      "step": 10200
    },
    {
      "epoch": 0.6545862732520846,
      "grad_norm": 0.7656674676905709,
      "learning_rate": 6.429806807530348e-05,
      "loss": 0.6668,
      "step": 10205
    },
    {
      "epoch": 0.6549069916613214,
      "grad_norm": 1.1136322722942007,
      "learning_rate": 6.419351658826236e-05,
      "loss": 0.7241,
      "step": 10210
    },
    {
      "epoch": 0.655227710070558,
      "grad_norm": 1.0761146316049985,
      "learning_rate": 6.40890099782536e-05,
      "loss": 0.6501,
      "step": 10215
    },
    {
      "epoch": 0.6555484284797948,
      "grad_norm": 0.9079430022905365,
      "learning_rate": 6.398454837625761e-05,
      "loss": 0.8384,
      "step": 10220
    },
    {
      "epoch": 0.6558691468890314,
      "grad_norm": 0.8488475441393789,
      "learning_rate": 6.388013191319829e-05,
      "loss": 0.697,
      "step": 10225
    },
    {
      "epoch": 0.6561898652982682,
      "grad_norm": 1.8731573144161795,
      "learning_rate": 6.377576071994306e-05,
      "loss": 0.5274,
      "step": 10230
    },
    {
      "epoch": 0.6565105837075048,
      "grad_norm": 0.9597668865369915,
      "learning_rate": 6.367143492730257e-05,
      "loss": 0.5793,
      "step": 10235
    },
    {
      "epoch": 0.6568313021167415,
      "grad_norm": 0.9184805187055093,
      "learning_rate": 6.356715466603058e-05,
      "loss": 0.7204,
      "step": 10240
    },
    {
      "epoch": 0.6571520205259782,
      "grad_norm": 1.010481078501907,
      "learning_rate": 6.346292006682375e-05,
      "loss": 0.6568,
      "step": 10245
    },
    {
      "epoch": 0.6574727389352149,
      "grad_norm": 1.2893595780329616,
      "learning_rate": 6.335873126032155e-05,
      "loss": 0.7476,
      "step": 10250
    },
    {
      "epoch": 0.6577934573444516,
      "grad_norm": 0.7919851978335327,
      "learning_rate": 6.325458837710603e-05,
      "loss": 0.6681,
      "step": 10255
    },
    {
      "epoch": 0.6581141757536882,
      "grad_norm": 0.7133876917502856,
      "learning_rate": 6.31504915477017e-05,
      "loss": 0.7879,
      "step": 10260
    },
    {
      "epoch": 0.658434894162925,
      "grad_norm": 0.8067826322951818,
      "learning_rate": 6.304644090257536e-05,
      "loss": 0.64,
      "step": 10265
    },
    {
      "epoch": 0.6587556125721616,
      "grad_norm": 0.7174409241967863,
      "learning_rate": 6.294243657213587e-05,
      "loss": 0.5671,
      "step": 10270
    },
    {
      "epoch": 0.6590763309813983,
      "grad_norm": 0.7812465401233117,
      "learning_rate": 6.283847868673417e-05,
      "loss": 0.628,
      "step": 10275
    },
    {
      "epoch": 0.659397049390635,
      "grad_norm": 0.565828308616574,
      "learning_rate": 6.273456737666281e-05,
      "loss": 0.621,
      "step": 10280
    },
    {
      "epoch": 0.6597177677998717,
      "grad_norm": 1.0913219783317336,
      "learning_rate": 6.26307027721561e-05,
      "loss": 0.6341,
      "step": 10285
    },
    {
      "epoch": 0.6600384862091084,
      "grad_norm": 0.812647700581263,
      "learning_rate": 6.252688500338979e-05,
      "loss": 0.6266,
      "step": 10290
    },
    {
      "epoch": 0.6603592046183451,
      "grad_norm": 1.3344320513324446,
      "learning_rate": 6.242311420048087e-05,
      "loss": 0.697,
      "step": 10295
    },
    {
      "epoch": 0.6606799230275818,
      "grad_norm": 0.8037339071262586,
      "learning_rate": 6.231939049348756e-05,
      "loss": 0.662,
      "step": 10300
    },
    {
      "epoch": 0.6610006414368185,
      "grad_norm": 0.8348124914063436,
      "learning_rate": 6.221571401240898e-05,
      "loss": 0.5953,
      "step": 10305
    },
    {
      "epoch": 0.6613213598460551,
      "grad_norm": 0.8007698372402566,
      "learning_rate": 6.211208488718508e-05,
      "loss": 0.7067,
      "step": 10310
    },
    {
      "epoch": 0.6616420782552919,
      "grad_norm": 1.0240691382811138,
      "learning_rate": 6.200850324769645e-05,
      "loss": 0.6563,
      "step": 10315
    },
    {
      "epoch": 0.6619627966645285,
      "grad_norm": 0.6245391951301155,
      "learning_rate": 6.190496922376419e-05,
      "loss": 0.566,
      "step": 10320
    },
    {
      "epoch": 0.6622835150737653,
      "grad_norm": 0.9667633410108524,
      "learning_rate": 6.180148294514969e-05,
      "loss": 0.6114,
      "step": 10325
    },
    {
      "epoch": 0.6626042334830019,
      "grad_norm": 0.7507271356005688,
      "learning_rate": 6.169804454155457e-05,
      "loss": 0.5604,
      "step": 10330
    },
    {
      "epoch": 0.6629249518922387,
      "grad_norm": 1.3185339543060972,
      "learning_rate": 6.159465414262034e-05,
      "loss": 0.6832,
      "step": 10335
    },
    {
      "epoch": 0.6632456703014753,
      "grad_norm": 1.1847306027291458,
      "learning_rate": 6.14913118779284e-05,
      "loss": 0.8276,
      "step": 10340
    },
    {
      "epoch": 0.6635663887107119,
      "grad_norm": 0.645482702109424,
      "learning_rate": 6.138801787699988e-05,
      "loss": 0.7251,
      "step": 10345
    },
    {
      "epoch": 0.6638871071199487,
      "grad_norm": 0.9170687001642995,
      "learning_rate": 6.128477226929532e-05,
      "loss": 0.5489,
      "step": 10350
    },
    {
      "epoch": 0.6642078255291853,
      "grad_norm": 1.000806725934412,
      "learning_rate": 6.118157518421468e-05,
      "loss": 0.7246,
      "step": 10355
    },
    {
      "epoch": 0.6645285439384221,
      "grad_norm": 0.8379511672470946,
      "learning_rate": 6.107842675109703e-05,
      "loss": 0.7874,
      "step": 10360
    },
    {
      "epoch": 0.6648492623476587,
      "grad_norm": 0.7371509556636497,
      "learning_rate": 6.097532709922054e-05,
      "loss": 0.6244,
      "step": 10365
    },
    {
      "epoch": 0.6651699807568955,
      "grad_norm": 0.9539665664045133,
      "learning_rate": 6.087227635780225e-05,
      "loss": 0.6107,
      "step": 10370
    },
    {
      "epoch": 0.6654906991661321,
      "grad_norm": 0.7979555148132079,
      "learning_rate": 6.0769274655997775e-05,
      "loss": 0.5344,
      "step": 10375
    },
    {
      "epoch": 0.6658114175753689,
      "grad_norm": 0.909657054573839,
      "learning_rate": 6.0666322122901396e-05,
      "loss": 0.6275,
      "step": 10380
    },
    {
      "epoch": 0.6661321359846055,
      "grad_norm": 1.0313940290067696,
      "learning_rate": 6.056341888754573e-05,
      "loss": 0.6082,
      "step": 10385
    },
    {
      "epoch": 0.6664528543938422,
      "grad_norm": 0.7489838245596225,
      "learning_rate": 6.0460565078901633e-05,
      "loss": 0.5819,
      "step": 10390
    },
    {
      "epoch": 0.6667735728030789,
      "grad_norm": 1.1118413959198947,
      "learning_rate": 6.035776082587794e-05,
      "loss": 0.5196,
      "step": 10395
    },
    {
      "epoch": 0.6670942912123156,
      "grad_norm": 0.8125706280287548,
      "learning_rate": 6.025500625732142e-05,
      "loss": 0.5352,
      "step": 10400
    },
    {
      "epoch": 0.6674150096215523,
      "grad_norm": 0.9492211031254315,
      "learning_rate": 6.015230150201661e-05,
      "loss": 0.5139,
      "step": 10405
    },
    {
      "epoch": 0.667735728030789,
      "grad_norm": 0.7268694268672965,
      "learning_rate": 6.0049646688685567e-05,
      "loss": 0.6442,
      "step": 10410
    },
    {
      "epoch": 0.6680564464400257,
      "grad_norm": 0.7538411268384596,
      "learning_rate": 5.994704194598775e-05,
      "loss": 0.7771,
      "step": 10415
    },
    {
      "epoch": 0.6683771648492624,
      "grad_norm": 0.732055273874663,
      "learning_rate": 5.9844487402519886e-05,
      "loss": 0.4246,
      "step": 10420
    },
    {
      "epoch": 0.668697883258499,
      "grad_norm": 0.9282996799361855,
      "learning_rate": 5.97419831868158e-05,
      "loss": 0.6212,
      "step": 10425
    },
    {
      "epoch": 0.6690186016677357,
      "grad_norm": 0.8160584484135337,
      "learning_rate": 5.96395294273462e-05,
      "loss": 0.5947,
      "step": 10430
    },
    {
      "epoch": 0.6693393200769724,
      "grad_norm": 0.563899508227464,
      "learning_rate": 5.9537126252518595e-05,
      "loss": 0.6085,
      "step": 10435
    },
    {
      "epoch": 0.6696600384862091,
      "grad_norm": 0.7096696600311123,
      "learning_rate": 5.9434773790677076e-05,
      "loss": 0.6623,
      "step": 10440
    },
    {
      "epoch": 0.6699807568954458,
      "grad_norm": 1.0083725702632502,
      "learning_rate": 5.933247217010216e-05,
      "loss": 0.7533,
      "step": 10445
    },
    {
      "epoch": 0.6703014753046825,
      "grad_norm": 0.8583730314996155,
      "learning_rate": 5.9230221519010634e-05,
      "loss": 0.6899,
      "step": 10450
    },
    {
      "epoch": 0.6706221937139192,
      "grad_norm": 0.9948242533172998,
      "learning_rate": 5.912802196555547e-05,
      "loss": 0.6441,
      "step": 10455
    },
    {
      "epoch": 0.6709429121231558,
      "grad_norm": 0.8416659287585814,
      "learning_rate": 5.902587363782553e-05,
      "loss": 0.52,
      "step": 10460
    },
    {
      "epoch": 0.6712636305323926,
      "grad_norm": 0.7875617753719326,
      "learning_rate": 5.892377666384552e-05,
      "loss": 0.8289,
      "step": 10465
    },
    {
      "epoch": 0.6715843489416292,
      "grad_norm": 1.3665322708300398,
      "learning_rate": 5.882173117157579e-05,
      "loss": 0.6931,
      "step": 10470
    },
    {
      "epoch": 0.671905067350866,
      "grad_norm": 1.484703583509698,
      "learning_rate": 5.871973728891207e-05,
      "loss": 0.6282,
      "step": 10475
    },
    {
      "epoch": 0.6722257857601026,
      "grad_norm": 0.6277171001704246,
      "learning_rate": 5.861779514368552e-05,
      "loss": 0.5476,
      "step": 10480
    },
    {
      "epoch": 0.6725465041693394,
      "grad_norm": 0.893359208561377,
      "learning_rate": 5.851590486366241e-05,
      "loss": 0.5851,
      "step": 10485
    },
    {
      "epoch": 0.672867222578576,
      "grad_norm": 0.7320275300041723,
      "learning_rate": 5.841406657654402e-05,
      "loss": 0.7706,
      "step": 10490
    },
    {
      "epoch": 0.6731879409878126,
      "grad_norm": 0.8287094016340315,
      "learning_rate": 5.831228040996643e-05,
      "loss": 0.6782,
      "step": 10495
    },
    {
      "epoch": 0.6735086593970494,
      "grad_norm": 0.668748966976369,
      "learning_rate": 5.8210546491500416e-05,
      "loss": 0.4843,
      "step": 10500
    },
    {
      "epoch": 0.673829377806286,
      "grad_norm": 0.7774193196749479,
      "learning_rate": 5.8108864948651385e-05,
      "loss": 0.6915,
      "step": 10505
    },
    {
      "epoch": 0.6741500962155228,
      "grad_norm": 0.7361276836480435,
      "learning_rate": 5.8007235908858815e-05,
      "loss": 0.6037,
      "step": 10510
    },
    {
      "epoch": 0.6744708146247594,
      "grad_norm": 0.9273797610571103,
      "learning_rate": 5.790565949949669e-05,
      "loss": 0.6447,
      "step": 10515
    },
    {
      "epoch": 0.6747915330339962,
      "grad_norm": 0.7357377379625472,
      "learning_rate": 5.780413584787285e-05,
      "loss": 0.6123,
      "step": 10520
    },
    {
      "epoch": 0.6751122514432328,
      "grad_norm": 0.7349196129011529,
      "learning_rate": 5.770266508122903e-05,
      "loss": 0.6148,
      "step": 10525
    },
    {
      "epoch": 0.6754329698524695,
      "grad_norm": 0.7228184809432814,
      "learning_rate": 5.760124732674079e-05,
      "loss": 0.7375,
      "step": 10530
    },
    {
      "epoch": 0.6757536882617062,
      "grad_norm": 0.7245846277368149,
      "learning_rate": 5.749988271151714e-05,
      "loss": 0.8622,
      "step": 10535
    },
    {
      "epoch": 0.6760744066709429,
      "grad_norm": 0.7864676224072312,
      "learning_rate": 5.739857136260046e-05,
      "loss": 0.712,
      "step": 10540
    },
    {
      "epoch": 0.6763951250801796,
      "grad_norm": 1.645141716455399,
      "learning_rate": 5.7297313406966534e-05,
      "loss": 0.6939,
      "step": 10545
    },
    {
      "epoch": 0.6767158434894163,
      "grad_norm": 0.5062488079743617,
      "learning_rate": 5.719610897152405e-05,
      "loss": 0.5611,
      "step": 10550
    },
    {
      "epoch": 0.677036561898653,
      "grad_norm": 0.7048718325836721,
      "learning_rate": 5.709495818311477e-05,
      "loss": 0.7464,
      "step": 10555
    },
    {
      "epoch": 0.6773572803078897,
      "grad_norm": 1.1659307946452016,
      "learning_rate": 5.699386116851309e-05,
      "loss": 0.7177,
      "step": 10560
    },
    {
      "epoch": 0.6776779987171264,
      "grad_norm": 0.9170897775066968,
      "learning_rate": 5.6892818054426035e-05,
      "loss": 0.669,
      "step": 10565
    },
    {
      "epoch": 0.6779987171263631,
      "grad_norm": 1.0508889718757837,
      "learning_rate": 5.679182896749322e-05,
      "loss": 0.6744,
      "step": 10570
    },
    {
      "epoch": 0.6783194355355997,
      "grad_norm": 0.8259858656059345,
      "learning_rate": 5.669089403428627e-05,
      "loss": 0.6801,
      "step": 10575
    },
    {
      "epoch": 0.6786401539448365,
      "grad_norm": 0.6629893516596802,
      "learning_rate": 5.659001338130923e-05,
      "loss": 0.6013,
      "step": 10580
    },
    {
      "epoch": 0.6789608723540731,
      "grad_norm": 0.968488221191984,
      "learning_rate": 5.648918713499787e-05,
      "loss": 0.7905,
      "step": 10585
    },
    {
      "epoch": 0.6792815907633099,
      "grad_norm": 0.7585559410962367,
      "learning_rate": 5.6388415421719996e-05,
      "loss": 0.5525,
      "step": 10590
    },
    {
      "epoch": 0.6796023091725465,
      "grad_norm": 1.2745141606185377,
      "learning_rate": 5.6287698367774897e-05,
      "loss": 0.7167,
      "step": 10595
    },
    {
      "epoch": 0.6799230275817832,
      "grad_norm": 0.6728914302123802,
      "learning_rate": 5.6187036099393375e-05,
      "loss": 0.6937,
      "step": 10600
    },
    {
      "epoch": 0.6802437459910199,
      "grad_norm": 0.600819149081247,
      "learning_rate": 5.608642874273771e-05,
      "loss": 0.6316,
      "step": 10605
    },
    {
      "epoch": 0.6805644644002565,
      "grad_norm": 0.6959088365991615,
      "learning_rate": 5.598587642390114e-05,
      "loss": 0.7457,
      "step": 10610
    },
    {
      "epoch": 0.6808851828094933,
      "grad_norm": 0.7266824723699652,
      "learning_rate": 5.5885379268908134e-05,
      "loss": 0.6045,
      "step": 10615
    },
    {
      "epoch": 0.6812059012187299,
      "grad_norm": 0.6681555688621381,
      "learning_rate": 5.578493740371389e-05,
      "loss": 0.6286,
      "step": 10620
    },
    {
      "epoch": 0.6815266196279667,
      "grad_norm": 0.7610528413953269,
      "learning_rate": 5.568455095420431e-05,
      "loss": 0.5733,
      "step": 10625
    },
    {
      "epoch": 0.6818473380372033,
      "grad_norm": 1.3214312132482846,
      "learning_rate": 5.558422004619597e-05,
      "loss": 0.6319,
      "step": 10630
    },
    {
      "epoch": 0.6821680564464401,
      "grad_norm": 0.6966982078568826,
      "learning_rate": 5.548394480543564e-05,
      "loss": 0.4698,
      "step": 10635
    },
    {
      "epoch": 0.6824887748556767,
      "grad_norm": 0.6367878363111128,
      "learning_rate": 5.538372535760057e-05,
      "loss": 0.662,
      "step": 10640
    },
    {
      "epoch": 0.6828094932649134,
      "grad_norm": 0.5466987109462808,
      "learning_rate": 5.528356182829777e-05,
      "loss": 0.5193,
      "step": 10645
    },
    {
      "epoch": 0.6831302116741501,
      "grad_norm": 0.8091665259225381,
      "learning_rate": 5.518345434306444e-05,
      "loss": 0.5853,
      "step": 10650
    },
    {
      "epoch": 0.6834509300833868,
      "grad_norm": 0.5989345577351957,
      "learning_rate": 5.508340302736743e-05,
      "loss": 0.5997,
      "step": 10655
    },
    {
      "epoch": 0.6837716484926235,
      "grad_norm": 0.8246700551716405,
      "learning_rate": 5.498340800660313e-05,
      "loss": 0.715,
      "step": 10660
    },
    {
      "epoch": 0.6840923669018601,
      "grad_norm": 0.7999016646795889,
      "learning_rate": 5.488346940609753e-05,
      "loss": 0.7212,
      "step": 10665
    },
    {
      "epoch": 0.6844130853110969,
      "grad_norm": 0.5763703153217136,
      "learning_rate": 5.4783587351105734e-05,
      "loss": 0.6361,
      "step": 10670
    },
    {
      "epoch": 0.6847338037203335,
      "grad_norm": 1.3911645606934129,
      "learning_rate": 5.4683761966812154e-05,
      "loss": 0.7494,
      "step": 10675
    },
    {
      "epoch": 0.6850545221295702,
      "grad_norm": 1.1526450545139104,
      "learning_rate": 5.458399337833002e-05,
      "loss": 0.5274,
      "step": 10680
    },
    {
      "epoch": 0.6853752405388069,
      "grad_norm": 1.0168267129176949,
      "learning_rate": 5.448428171070141e-05,
      "loss": 0.8071,
      "step": 10685
    },
    {
      "epoch": 0.6856959589480436,
      "grad_norm": 0.7598086971815275,
      "learning_rate": 5.438462708889718e-05,
      "loss": 0.676,
      "step": 10690
    },
    {
      "epoch": 0.6860166773572803,
      "grad_norm": 1.056491176869749,
      "learning_rate": 5.428502963781654e-05,
      "loss": 0.591,
      "step": 10695
    },
    {
      "epoch": 0.686337395766517,
      "grad_norm": 0.8433612740283131,
      "learning_rate": 5.418548948228709e-05,
      "loss": 0.6323,
      "step": 10700
    },
    {
      "epoch": 0.6866581141757537,
      "grad_norm": 1.1399615640431888,
      "learning_rate": 5.408600674706474e-05,
      "loss": 0.6943,
      "step": 10705
    },
    {
      "epoch": 0.6869788325849904,
      "grad_norm": 1.1427576567421822,
      "learning_rate": 5.39865815568332e-05,
      "loss": 0.6542,
      "step": 10710
    },
    {
      "epoch": 0.687299550994227,
      "grad_norm": 0.8398449025370285,
      "learning_rate": 5.3887214036204295e-05,
      "loss": 0.6775,
      "step": 10715
    },
    {
      "epoch": 0.6876202694034638,
      "grad_norm": 0.6183753226440165,
      "learning_rate": 5.3787904309717365e-05,
      "loss": 0.5856,
      "step": 10720
    },
    {
      "epoch": 0.6879409878127004,
      "grad_norm": 0.7303097761926962,
      "learning_rate": 5.368865250183952e-05,
      "loss": 0.5393,
      "step": 10725
    },
    {
      "epoch": 0.6882617062219372,
      "grad_norm": 1.042159531292707,
      "learning_rate": 5.358945873696514e-05,
      "loss": 0.598,
      "step": 10730
    },
    {
      "epoch": 0.6885824246311738,
      "grad_norm": 0.8726534481321939,
      "learning_rate": 5.3490323139415844e-05,
      "loss": 0.6874,
      "step": 10735
    },
    {
      "epoch": 0.6889031430404106,
      "grad_norm": 0.8279765934645724,
      "learning_rate": 5.339124583344046e-05,
      "loss": 0.7282,
      "step": 10740
    },
    {
      "epoch": 0.6892238614496472,
      "grad_norm": 1.1033370234326692,
      "learning_rate": 5.3292226943214666e-05,
      "loss": 0.6647,
      "step": 10745
    },
    {
      "epoch": 0.689544579858884,
      "grad_norm": 0.6731635406372563,
      "learning_rate": 5.3193266592840994e-05,
      "loss": 0.642,
      "step": 10750
    },
    {
      "epoch": 0.6898652982681206,
      "grad_norm": 0.682406135632238,
      "learning_rate": 5.309436490634855e-05,
      "loss": 0.6876,
      "step": 10755
    },
    {
      "epoch": 0.6901860166773572,
      "grad_norm": 0.6884304464201593,
      "learning_rate": 5.299552200769289e-05,
      "loss": 0.6405,
      "step": 10760
    },
    {
      "epoch": 0.690506735086594,
      "grad_norm": 0.9303606786373573,
      "learning_rate": 5.289673802075601e-05,
      "loss": 0.5867,
      "step": 10765
    },
    {
      "epoch": 0.6908274534958306,
      "grad_norm": 0.8966481917540933,
      "learning_rate": 5.279801306934598e-05,
      "loss": 0.7328,
      "step": 10770
    },
    {
      "epoch": 0.6911481719050674,
      "grad_norm": 0.8301326693368314,
      "learning_rate": 5.269934727719685e-05,
      "loss": 0.673,
      "step": 10775
    },
    {
      "epoch": 0.691468890314304,
      "grad_norm": 0.9231136482226949,
      "learning_rate": 5.260074076796859e-05,
      "loss": 0.8013,
      "step": 10780
    },
    {
      "epoch": 0.6917896087235408,
      "grad_norm": 0.6344332487623263,
      "learning_rate": 5.250219366524687e-05,
      "loss": 0.6477,
      "step": 10785
    },
    {
      "epoch": 0.6921103271327774,
      "grad_norm": 0.6184925377516596,
      "learning_rate": 5.240370609254288e-05,
      "loss": 0.5484,
      "step": 10790
    },
    {
      "epoch": 0.6924310455420141,
      "grad_norm": 0.7946249563385892,
      "learning_rate": 5.230527817329316e-05,
      "loss": 0.7455,
      "step": 10795
    },
    {
      "epoch": 0.6927517639512508,
      "grad_norm": 0.5532448902772473,
      "learning_rate": 5.22069100308596e-05,
      "loss": 0.5486,
      "step": 10800
    },
    {
      "epoch": 0.6930724823604875,
      "grad_norm": 0.6171304782365078,
      "learning_rate": 5.210860178852903e-05,
      "loss": 0.681,
      "step": 10805
    },
    {
      "epoch": 0.6933932007697242,
      "grad_norm": 1.2635876971136728,
      "learning_rate": 5.201035356951334e-05,
      "loss": 0.6736,
      "step": 10810
    },
    {
      "epoch": 0.6937139191789609,
      "grad_norm": 0.5205480150437042,
      "learning_rate": 5.191216549694909e-05,
      "loss": 0.5153,
      "step": 10815
    },
    {
      "epoch": 0.6940346375881976,
      "grad_norm": 0.9442523324184217,
      "learning_rate": 5.1814037693897464e-05,
      "loss": 0.6185,
      "step": 10820
    },
    {
      "epoch": 0.6943553559974343,
      "grad_norm": 1.1934267268940544,
      "learning_rate": 5.1715970283344205e-05,
      "loss": 0.6677,
      "step": 10825
    },
    {
      "epoch": 0.6946760744066709,
      "grad_norm": 0.7652562771619698,
      "learning_rate": 5.161796338819924e-05,
      "loss": 0.7638,
      "step": 10830
    },
    {
      "epoch": 0.6949967928159076,
      "grad_norm": 0.8994137424891815,
      "learning_rate": 5.152001713129677e-05,
      "loss": 0.5898,
      "step": 10835
    },
    {
      "epoch": 0.6953175112251443,
      "grad_norm": 1.1569578317709166,
      "learning_rate": 5.142213163539491e-05,
      "loss": 0.5728,
      "step": 10840
    },
    {
      "epoch": 0.695638229634381,
      "grad_norm": 0.9567492023568471,
      "learning_rate": 5.132430702317562e-05,
      "loss": 0.6646,
      "step": 10845
    },
    {
      "epoch": 0.6959589480436177,
      "grad_norm": 0.9942541719053858,
      "learning_rate": 5.122654341724462e-05,
      "loss": 0.7398,
      "step": 10850
    },
    {
      "epoch": 0.6962796664528544,
      "grad_norm": 0.69345380130255,
      "learning_rate": 5.1128840940131064e-05,
      "loss": 0.5888,
      "step": 10855
    },
    {
      "epoch": 0.6966003848620911,
      "grad_norm": 0.8276215026435204,
      "learning_rate": 5.103119971428765e-05,
      "loss": 0.6781,
      "step": 10860
    },
    {
      "epoch": 0.6969211032713277,
      "grad_norm": 0.7245991079345528,
      "learning_rate": 5.093361986209015e-05,
      "loss": 0.7442,
      "step": 10865
    },
    {
      "epoch": 0.6972418216805645,
      "grad_norm": 0.7885551527874833,
      "learning_rate": 5.0836101505837494e-05,
      "loss": 0.6788,
      "step": 10870
    },
    {
      "epoch": 0.6975625400898011,
      "grad_norm": 0.857297702149309,
      "learning_rate": 5.073864476775157e-05,
      "loss": 0.6013,
      "step": 10875
    },
    {
      "epoch": 0.6978832584990379,
      "grad_norm": 0.6348649341355659,
      "learning_rate": 5.064124976997693e-05,
      "loss": 0.6045,
      "step": 10880
    },
    {
      "epoch": 0.6982039769082745,
      "grad_norm": 0.6585605551969316,
      "learning_rate": 5.054391663458087e-05,
      "loss": 0.6171,
      "step": 10885
    },
    {
      "epoch": 0.6985246953175113,
      "grad_norm": 0.986468962885202,
      "learning_rate": 5.044664548355307e-05,
      "loss": 0.7186,
      "step": 10890
    },
    {
      "epoch": 0.6988454137267479,
      "grad_norm": 0.9785918246000489,
      "learning_rate": 5.0349436438805494e-05,
      "loss": 0.7877,
      "step": 10895
    },
    {
      "epoch": 0.6991661321359846,
      "grad_norm": 1.5065392603292607,
      "learning_rate": 5.025228962217241e-05,
      "loss": 0.6156,
      "step": 10900
    },
    {
      "epoch": 0.6994868505452213,
      "grad_norm": 0.9224408618353005,
      "learning_rate": 5.015520515540996e-05,
      "loss": 0.5855,
      "step": 10905
    },
    {
      "epoch": 0.699807568954458,
      "grad_norm": 0.8828715863784493,
      "learning_rate": 5.005818316019618e-05,
      "loss": 0.6038,
      "step": 10910
    },
    {
      "epoch": 0.7001282873636947,
      "grad_norm": 0.9568291721616811,
      "learning_rate": 4.996122375813079e-05,
      "loss": 0.6317,
      "step": 10915
    },
    {
      "epoch": 0.7004490057729313,
      "grad_norm": 1.4247569725340374,
      "learning_rate": 4.986432707073515e-05,
      "loss": 0.7097,
      "step": 10920
    },
    {
      "epoch": 0.7007697241821681,
      "grad_norm": 0.5257863778727976,
      "learning_rate": 4.976749321945191e-05,
      "loss": 0.5316,
      "step": 10925
    },
    {
      "epoch": 0.7010904425914047,
      "grad_norm": 0.7116948483921095,
      "learning_rate": 4.9670722325644993e-05,
      "loss": 0.6438,
      "step": 10930
    },
    {
      "epoch": 0.7014111610006415,
      "grad_norm": 0.8934801180351521,
      "learning_rate": 4.957401451059948e-05,
      "loss": 0.6628,
      "step": 10935
    },
    {
      "epoch": 0.7017318794098781,
      "grad_norm": 0.5554525116078812,
      "learning_rate": 4.9477369895521284e-05,
      "loss": 0.6803,
      "step": 10940
    },
    {
      "epoch": 0.7020525978191148,
      "grad_norm": 1.115600134036066,
      "learning_rate": 4.938078860153725e-05,
      "loss": 0.582,
      "step": 10945
    },
    {
      "epoch": 0.7023733162283515,
      "grad_norm": 1.04204980372642,
      "learning_rate": 4.928427074969475e-05,
      "loss": 0.6396,
      "step": 10950
    },
    {
      "epoch": 0.7026940346375882,
      "grad_norm": 0.6952203258967746,
      "learning_rate": 4.918781646096161e-05,
      "loss": 0.609,
      "step": 10955
    },
    {
      "epoch": 0.7030147530468249,
      "grad_norm": 0.8455941974814938,
      "learning_rate": 4.909142585622616e-05,
      "loss": 0.7442,
      "step": 10960
    },
    {
      "epoch": 0.7033354714560616,
      "grad_norm": 0.9358056805840572,
      "learning_rate": 4.899509905629671e-05,
      "loss": 0.6163,
      "step": 10965
    },
    {
      "epoch": 0.7036561898652983,
      "grad_norm": 0.8368567909279319,
      "learning_rate": 4.889883618190184e-05,
      "loss": 0.6729,
      "step": 10970
    },
    {
      "epoch": 0.703976908274535,
      "grad_norm": 0.9626200217934863,
      "learning_rate": 4.8802637353689694e-05,
      "loss": 0.6208,
      "step": 10975
    },
    {
      "epoch": 0.7042976266837716,
      "grad_norm": 1.423525816978348,
      "learning_rate": 4.870650269222845e-05,
      "loss": 0.6301,
      "step": 10980
    },
    {
      "epoch": 0.7046183450930084,
      "grad_norm": 0.8943539539791406,
      "learning_rate": 4.8610432318005705e-05,
      "loss": 0.8259,
      "step": 10985
    },
    {
      "epoch": 0.704939063502245,
      "grad_norm": 1.0047328070171035,
      "learning_rate": 4.851442635142846e-05,
      "loss": 0.6759,
      "step": 10990
    },
    {
      "epoch": 0.7052597819114818,
      "grad_norm": 0.864965532206175,
      "learning_rate": 4.841848491282315e-05,
      "loss": 0.6722,
      "step": 10995
    },
    {
      "epoch": 0.7055805003207184,
      "grad_norm": 0.7890255740216144,
      "learning_rate": 4.832260812243513e-05,
      "loss": 0.6922,
      "step": 11000
    },
    {
      "epoch": 0.7059012187299551,
      "grad_norm": 1.2389180866062235,
      "learning_rate": 4.822679610042894e-05,
      "loss": 0.6051,
      "step": 11005
    },
    {
      "epoch": 0.7062219371391918,
      "grad_norm": 0.6998283128694094,
      "learning_rate": 4.813104896688777e-05,
      "loss": 0.6615,
      "step": 11010
    },
    {
      "epoch": 0.7065426555484284,
      "grad_norm": 0.8090143409111475,
      "learning_rate": 4.803536684181354e-05,
      "loss": 0.7387,
      "step": 11015
    },
    {
      "epoch": 0.7068633739576652,
      "grad_norm": 1.0370968663682347,
      "learning_rate": 4.793974984512677e-05,
      "loss": 0.7072,
      "step": 11020
    },
    {
      "epoch": 0.7071840923669018,
      "grad_norm": 0.7853945975713512,
      "learning_rate": 4.7844198096666246e-05,
      "loss": 0.686,
      "step": 11025
    },
    {
      "epoch": 0.7075048107761386,
      "grad_norm": 0.702386626377002,
      "learning_rate": 4.774871171618901e-05,
      "loss": 0.7127,
      "step": 11030
    },
    {
      "epoch": 0.7078255291853752,
      "grad_norm": 1.0108215460660506,
      "learning_rate": 4.765329082337027e-05,
      "loss": 0.6434,
      "step": 11035
    },
    {
      "epoch": 0.708146247594612,
      "grad_norm": 0.9899048924342988,
      "learning_rate": 4.755793553780292e-05,
      "loss": 0.7323,
      "step": 11040
    },
    {
      "epoch": 0.7084669660038486,
      "grad_norm": 0.9147032893585562,
      "learning_rate": 4.746264597899792e-05,
      "loss": 0.6739,
      "step": 11045
    },
    {
      "epoch": 0.7087876844130853,
      "grad_norm": 1.0330004401132,
      "learning_rate": 4.736742226638363e-05,
      "loss": 0.8609,
      "step": 11050
    },
    {
      "epoch": 0.709108402822322,
      "grad_norm": 0.6548738796277453,
      "learning_rate": 4.727226451930604e-05,
      "loss": 0.6734,
      "step": 11055
    },
    {
      "epoch": 0.7094291212315587,
      "grad_norm": 0.81714120996019,
      "learning_rate": 4.717717285702835e-05,
      "loss": 0.7523,
      "step": 11060
    },
    {
      "epoch": 0.7097498396407954,
      "grad_norm": 0.885017113426685,
      "learning_rate": 4.708214739873096e-05,
      "loss": 0.5943,
      "step": 11065
    },
    {
      "epoch": 0.710070558050032,
      "grad_norm": 0.8620179894720568,
      "learning_rate": 4.698718826351135e-05,
      "loss": 0.593,
      "step": 11070
    },
    {
      "epoch": 0.7103912764592688,
      "grad_norm": 0.7663377237340008,
      "learning_rate": 4.689229557038379e-05,
      "loss": 0.7649,
      "step": 11075
    },
    {
      "epoch": 0.7107119948685054,
      "grad_norm": 0.779291905786263,
      "learning_rate": 4.679746943827939e-05,
      "loss": 0.6231,
      "step": 11080
    },
    {
      "epoch": 0.7110327132777421,
      "grad_norm": 0.8488045821194506,
      "learning_rate": 4.6702709986045745e-05,
      "loss": 0.5658,
      "step": 11085
    },
    {
      "epoch": 0.7113534316869788,
      "grad_norm": 0.7591544492497508,
      "learning_rate": 4.660801733244685e-05,
      "loss": 0.5434,
      "step": 11090
    },
    {
      "epoch": 0.7116741500962155,
      "grad_norm": 0.9324567178402989,
      "learning_rate": 4.651339159616312e-05,
      "loss": 0.7694,
      "step": 11095
    },
    {
      "epoch": 0.7119948685054522,
      "grad_norm": 0.614241285241644,
      "learning_rate": 4.641883289579095e-05,
      "loss": 0.573,
      "step": 11100
    },
    {
      "epoch": 0.7123155869146889,
      "grad_norm": 0.7297521213628075,
      "learning_rate": 4.632434134984288e-05,
      "loss": 0.7862,
      "step": 11105
    },
    {
      "epoch": 0.7126363053239256,
      "grad_norm": 0.8547500506968054,
      "learning_rate": 4.6229917076747056e-05,
      "loss": 0.6224,
      "step": 11110
    },
    {
      "epoch": 0.7129570237331623,
      "grad_norm": 1.1207952262364815,
      "learning_rate": 4.613556019484754e-05,
      "loss": 0.7452,
      "step": 11115
    },
    {
      "epoch": 0.7132777421423989,
      "grad_norm": 0.5122245150734959,
      "learning_rate": 4.604127082240379e-05,
      "loss": 0.6216,
      "step": 11120
    },
    {
      "epoch": 0.7135984605516357,
      "grad_norm": 0.6841888313664231,
      "learning_rate": 4.5947049077590664e-05,
      "loss": 0.6031,
      "step": 11125
    },
    {
      "epoch": 0.7139191789608723,
      "grad_norm": 0.8085851937507493,
      "learning_rate": 4.585289507849838e-05,
      "loss": 0.5983,
      "step": 11130
    },
    {
      "epoch": 0.7142398973701091,
      "grad_norm": 0.8748340585570812,
      "learning_rate": 4.575880894313207e-05,
      "loss": 0.6462,
      "step": 11135
    },
    {
      "epoch": 0.7145606157793457,
      "grad_norm": 0.5741182108460992,
      "learning_rate": 4.566479078941198e-05,
      "loss": 0.6313,
      "step": 11140
    },
    {
      "epoch": 0.7148813341885825,
      "grad_norm": 1.3368271859986067,
      "learning_rate": 4.557084073517305e-05,
      "loss": 0.5434,
      "step": 11145
    },
    {
      "epoch": 0.7152020525978191,
      "grad_norm": 0.7497857375686727,
      "learning_rate": 4.547695889816485e-05,
      "loss": 0.557,
      "step": 11150
    },
    {
      "epoch": 0.7155227710070559,
      "grad_norm": 0.8178864612038674,
      "learning_rate": 4.538314539605155e-05,
      "loss": 0.6979,
      "step": 11155
    },
    {
      "epoch": 0.7158434894162925,
      "grad_norm": 0.8969560105198988,
      "learning_rate": 4.528940034641158e-05,
      "loss": 0.765,
      "step": 11160
    },
    {
      "epoch": 0.7161642078255291,
      "grad_norm": 1.2265503200288288,
      "learning_rate": 4.519572386673768e-05,
      "loss": 0.5296,
      "step": 11165
    },
    {
      "epoch": 0.7164849262347659,
      "grad_norm": 0.611571817659739,
      "learning_rate": 4.510211607443654e-05,
      "loss": 0.6223,
      "step": 11170
    },
    {
      "epoch": 0.7168056446440025,
      "grad_norm": 0.8641143822600184,
      "learning_rate": 4.500857708682883e-05,
      "loss": 0.7204,
      "step": 11175
    },
    {
      "epoch": 0.7171263630532393,
      "grad_norm": 0.9563759174291445,
      "learning_rate": 4.491510702114894e-05,
      "loss": 0.6728,
      "step": 11180
    },
    {
      "epoch": 0.7174470814624759,
      "grad_norm": 0.5814502110654781,
      "learning_rate": 4.482170599454489e-05,
      "loss": 0.6652,
      "step": 11185
    },
    {
      "epoch": 0.7177677998717127,
      "grad_norm": 1.0858563785495055,
      "learning_rate": 4.472837412407825e-05,
      "loss": 0.5543,
      "step": 11190
    },
    {
      "epoch": 0.7180885182809493,
      "grad_norm": 0.6644009179012256,
      "learning_rate": 4.4635111526723826e-05,
      "loss": 0.8072,
      "step": 11195
    },
    {
      "epoch": 0.718409236690186,
      "grad_norm": 0.9031430293191645,
      "learning_rate": 4.454191831936958e-05,
      "loss": 0.7006,
      "step": 11200
    },
    {
      "epoch": 0.7187299550994227,
      "grad_norm": 0.6707442290616978,
      "learning_rate": 4.4448794618816634e-05,
      "loss": 0.6081,
      "step": 11205
    },
    {
      "epoch": 0.7190506735086594,
      "grad_norm": 0.4567339031728235,
      "learning_rate": 4.4355740541778837e-05,
      "loss": 0.5996,
      "step": 11210
    },
    {
      "epoch": 0.7193713919178961,
      "grad_norm": 0.8456434286308311,
      "learning_rate": 4.426275620488293e-05,
      "loss": 0.5902,
      "step": 11215
    },
    {
      "epoch": 0.7196921103271328,
      "grad_norm": 0.7375984313670896,
      "learning_rate": 4.416984172466814e-05,
      "loss": 0.5592,
      "step": 11220
    },
    {
      "epoch": 0.7200128287363695,
      "grad_norm": 1.001285278455043,
      "learning_rate": 4.407699721758614e-05,
      "loss": 0.4883,
      "step": 11225
    },
    {
      "epoch": 0.7203335471456062,
      "grad_norm": 1.2917508534051378,
      "learning_rate": 4.398422280000101e-05,
      "loss": 0.6768,
      "step": 11230
    },
    {
      "epoch": 0.7206542655548428,
      "grad_norm": 0.9685204099266428,
      "learning_rate": 4.3891518588188875e-05,
      "loss": 0.5883,
      "step": 11235
    },
    {
      "epoch": 0.7209749839640796,
      "grad_norm": 0.5295383592814902,
      "learning_rate": 4.379888469833791e-05,
      "loss": 0.6229,
      "step": 11240
    },
    {
      "epoch": 0.7212957023733162,
      "grad_norm": 0.9573436890552846,
      "learning_rate": 4.370632124654811e-05,
      "loss": 0.7156,
      "step": 11245
    },
    {
      "epoch": 0.721616420782553,
      "grad_norm": 0.741578858748363,
      "learning_rate": 4.361382834883131e-05,
      "loss": 0.6556,
      "step": 11250
    },
    {
      "epoch": 0.7219371391917896,
      "grad_norm": 0.916633580201409,
      "learning_rate": 4.3521406121110807e-05,
      "loss": 0.676,
      "step": 11255
    },
    {
      "epoch": 0.7222578576010263,
      "grad_norm": 0.3992983111166088,
      "learning_rate": 4.342905467922133e-05,
      "loss": 0.4788,
      "step": 11260
    },
    {
      "epoch": 0.722578576010263,
      "grad_norm": 1.4519640203571154,
      "learning_rate": 4.333677413890896e-05,
      "loss": 0.7693,
      "step": 11265
    },
    {
      "epoch": 0.7228992944194996,
      "grad_norm": 1.014341854127021,
      "learning_rate": 4.324456461583084e-05,
      "loss": 0.7161,
      "step": 11270
    },
    {
      "epoch": 0.7232200128287364,
      "grad_norm": 0.5798440252008737,
      "learning_rate": 4.315242622555518e-05,
      "loss": 0.5319,
      "step": 11275
    },
    {
      "epoch": 0.723540731237973,
      "grad_norm": 1.3961411697107977,
      "learning_rate": 4.306035908356097e-05,
      "loss": 0.7755,
      "step": 11280
    },
    {
      "epoch": 0.7238614496472098,
      "grad_norm": 0.7989332199967835,
      "learning_rate": 4.296836330523791e-05,
      "loss": 0.6761,
      "step": 11285
    },
    {
      "epoch": 0.7241821680564464,
      "grad_norm": 0.5432452037456782,
      "learning_rate": 4.287643900588634e-05,
      "loss": 0.5398,
      "step": 11290
    },
    {
      "epoch": 0.7245028864656832,
      "grad_norm": 1.1422963762576541,
      "learning_rate": 4.278458630071687e-05,
      "loss": 0.5321,
      "step": 11295
    },
    {
      "epoch": 0.7248236048749198,
      "grad_norm": 0.6668170639427147,
      "learning_rate": 4.2692805304850545e-05,
      "loss": 0.5796,
      "step": 11300
    },
    {
      "epoch": 0.7251443232841565,
      "grad_norm": 0.8515640505208902,
      "learning_rate": 4.260109613331842e-05,
      "loss": 0.6569,
      "step": 11305
    },
    {
      "epoch": 0.7254650416933932,
      "grad_norm": 0.7014693919060985,
      "learning_rate": 4.250945890106156e-05,
      "loss": 0.6856,
      "step": 11310
    },
    {
      "epoch": 0.7257857601026299,
      "grad_norm": 1.067030988068662,
      "learning_rate": 4.241789372293087e-05,
      "loss": 0.7749,
      "step": 11315
    },
    {
      "epoch": 0.7261064785118666,
      "grad_norm": 0.7479024679363765,
      "learning_rate": 4.232640071368691e-05,
      "loss": 0.5478,
      "step": 11320
    },
    {
      "epoch": 0.7264271969211032,
      "grad_norm": 1.0084686752935972,
      "learning_rate": 4.22349799879999e-05,
      "loss": 0.7788,
      "step": 11325
    },
    {
      "epoch": 0.72674791533034,
      "grad_norm": 0.6585878195188157,
      "learning_rate": 4.214363166044932e-05,
      "loss": 0.6133,
      "step": 11330
    },
    {
      "epoch": 0.7270686337395766,
      "grad_norm": 0.6784141958893567,
      "learning_rate": 4.205235584552407e-05,
      "loss": 0.6019,
      "step": 11335
    },
    {
      "epoch": 0.7273893521488134,
      "grad_norm": 0.993300088957976,
      "learning_rate": 4.1961152657622024e-05,
      "loss": 0.7166,
      "step": 11340
    },
    {
      "epoch": 0.72771007055805,
      "grad_norm": 0.8874942343310022,
      "learning_rate": 4.1870022211050074e-05,
      "loss": 0.6981,
      "step": 11345
    },
    {
      "epoch": 0.7280307889672867,
      "grad_norm": 1.4921657931640064,
      "learning_rate": 4.177896462002402e-05,
      "loss": 0.5832,
      "step": 11350
    },
    {
      "epoch": 0.7283515073765234,
      "grad_norm": 0.7853192040977804,
      "learning_rate": 4.168797999866827e-05,
      "loss": 0.7185,
      "step": 11355
    },
    {
      "epoch": 0.7286722257857601,
      "grad_norm": 0.7775032508697538,
      "learning_rate": 4.159706846101574e-05,
      "loss": 0.5868,
      "step": 11360
    },
    {
      "epoch": 0.7289929441949968,
      "grad_norm": 0.8328166231193795,
      "learning_rate": 4.1506230121007894e-05,
      "loss": 0.6707,
      "step": 11365
    },
    {
      "epoch": 0.7293136626042335,
      "grad_norm": 1.1556231103657886,
      "learning_rate": 4.141546509249433e-05,
      "loss": 0.602,
      "step": 11370
    },
    {
      "epoch": 0.7296343810134702,
      "grad_norm": 0.6535692635433068,
      "learning_rate": 4.1324773489232794e-05,
      "loss": 0.7015,
      "step": 11375
    },
    {
      "epoch": 0.7299550994227069,
      "grad_norm": 1.0308989718059964,
      "learning_rate": 4.1234155424889e-05,
      "loss": 0.6524,
      "step": 11380
    },
    {
      "epoch": 0.7302758178319435,
      "grad_norm": 0.9042723107486375,
      "learning_rate": 4.1143611013036556e-05,
      "loss": 0.6932,
      "step": 11385
    },
    {
      "epoch": 0.7305965362411803,
      "grad_norm": 1.045581159518661,
      "learning_rate": 4.105314036715668e-05,
      "loss": 0.598,
      "step": 11390
    },
    {
      "epoch": 0.7309172546504169,
      "grad_norm": 0.720438985489428,
      "learning_rate": 4.096274360063814e-05,
      "loss": 0.6927,
      "step": 11395
    },
    {
      "epoch": 0.7312379730596537,
      "grad_norm": 0.7837057060205996,
      "learning_rate": 4.087242082677721e-05,
      "loss": 0.6271,
      "step": 11400
    },
    {
      "epoch": 0.7315586914688903,
      "grad_norm": 0.9277273501073059,
      "learning_rate": 4.0782172158777296e-05,
      "loss": 0.7232,
      "step": 11405
    },
    {
      "epoch": 0.731879409878127,
      "grad_norm": 0.7663141809384151,
      "learning_rate": 4.069199770974904e-05,
      "loss": 0.5593,
      "step": 11410
    },
    {
      "epoch": 0.7322001282873637,
      "grad_norm": 0.7732548069785231,
      "learning_rate": 4.0601897592709984e-05,
      "loss": 0.6973,
      "step": 11415
    },
    {
      "epoch": 0.7325208466966003,
      "grad_norm": 1.0148083244026747,
      "learning_rate": 4.0511871920584486e-05,
      "loss": 0.8616,
      "step": 11420
    },
    {
      "epoch": 0.7328415651058371,
      "grad_norm": 0.7789337008538708,
      "learning_rate": 4.042192080620374e-05,
      "loss": 0.7399,
      "step": 11425
    },
    {
      "epoch": 0.7331622835150737,
      "grad_norm": 0.7411707815027391,
      "learning_rate": 4.033204436230532e-05,
      "loss": 0.7219,
      "step": 11430
    },
    {
      "epoch": 0.7334830019243105,
      "grad_norm": 0.9973447184162525,
      "learning_rate": 4.0242242701533396e-05,
      "loss": 0.6579,
      "step": 11435
    },
    {
      "epoch": 0.7338037203335471,
      "grad_norm": 0.5830094144343125,
      "learning_rate": 4.015251593643818e-05,
      "loss": 0.7666,
      "step": 11440
    },
    {
      "epoch": 0.7341244387427839,
      "grad_norm": 0.9049494653802453,
      "learning_rate": 4.006286417947627e-05,
      "loss": 0.7362,
      "step": 11445
    },
    {
      "epoch": 0.7344451571520205,
      "grad_norm": 1.1555455068409544,
      "learning_rate": 3.9973287543010064e-05,
      "loss": 0.7706,
      "step": 11450
    },
    {
      "epoch": 0.7347658755612572,
      "grad_norm": 0.8236939327253207,
      "learning_rate": 3.9883786139307864e-05,
      "loss": 0.4883,
      "step": 11455
    },
    {
      "epoch": 0.7350865939704939,
      "grad_norm": 0.7242616375495603,
      "learning_rate": 3.979436008054377e-05,
      "loss": 0.6765,
      "step": 11460
    },
    {
      "epoch": 0.7354073123797306,
      "grad_norm": 0.8282782204794581,
      "learning_rate": 3.97050094787973e-05,
      "loss": 0.6393,
      "step": 11465
    },
    {
      "epoch": 0.7357280307889673,
      "grad_norm": 0.5484580528486228,
      "learning_rate": 3.9615734446053534e-05,
      "loss": 0.6273,
      "step": 11470
    },
    {
      "epoch": 0.736048749198204,
      "grad_norm": 0.8342001080027434,
      "learning_rate": 3.952653509420277e-05,
      "loss": 0.6517,
      "step": 11475
    },
    {
      "epoch": 0.7363694676074407,
      "grad_norm": 0.8544406097793438,
      "learning_rate": 3.9437411535040416e-05,
      "loss": 0.5679,
      "step": 11480
    },
    {
      "epoch": 0.7366901860166774,
      "grad_norm": 0.8001118287868482,
      "learning_rate": 3.9348363880267006e-05,
      "loss": 0.7448,
      "step": 11485
    },
    {
      "epoch": 0.737010904425914,
      "grad_norm": 1.0049068620138881,
      "learning_rate": 3.92593922414878e-05,
      "loss": 0.5381,
      "step": 11490
    },
    {
      "epoch": 0.7373316228351507,
      "grad_norm": 1.0836198813580136,
      "learning_rate": 3.9170496730212944e-05,
      "loss": 0.6346,
      "step": 11495
    },
    {
      "epoch": 0.7376523412443874,
      "grad_norm": 0.4690219622238173,
      "learning_rate": 3.9081677457857045e-05,
      "loss": 0.5469,
      "step": 11500
    },
    {
      "epoch": 0.7379730596536241,
      "grad_norm": 0.7653256546259366,
      "learning_rate": 3.899293453573919e-05,
      "loss": 0.6005,
      "step": 11505
    },
    {
      "epoch": 0.7382937780628608,
      "grad_norm": 0.8939110106983141,
      "learning_rate": 3.890426807508278e-05,
      "loss": 0.6783,
      "step": 11510
    },
    {
      "epoch": 0.7386144964720975,
      "grad_norm": 0.775603525768831,
      "learning_rate": 3.881567818701538e-05,
      "loss": 0.6916,
      "step": 11515
    },
    {
      "epoch": 0.7389352148813342,
      "grad_norm": 1.3430493149234304,
      "learning_rate": 3.872716498256863e-05,
      "loss": 0.5578,
      "step": 11520
    },
    {
      "epoch": 0.7392559332905709,
      "grad_norm": 0.715829315420304,
      "learning_rate": 3.863872857267802e-05,
      "loss": 0.7686,
      "step": 11525
    },
    {
      "epoch": 0.7395766516998076,
      "grad_norm": 0.6732314863048653,
      "learning_rate": 3.8550369068182735e-05,
      "loss": 0.4974,
      "step": 11530
    },
    {
      "epoch": 0.7398973701090442,
      "grad_norm": 0.5624440967305854,
      "learning_rate": 3.846208657982572e-05,
      "loss": 0.5765,
      "step": 11535
    },
    {
      "epoch": 0.740218088518281,
      "grad_norm": 0.9351668361698933,
      "learning_rate": 3.837388121825323e-05,
      "loss": 0.6699,
      "step": 11540
    },
    {
      "epoch": 0.7405388069275176,
      "grad_norm": 1.0442410475484458,
      "learning_rate": 3.828575309401501e-05,
      "loss": 0.5723,
      "step": 11545
    },
    {
      "epoch": 0.7408595253367544,
      "grad_norm": 0.897573742077218,
      "learning_rate": 3.819770231756389e-05,
      "loss": 0.7723,
      "step": 11550
    },
    {
      "epoch": 0.741180243745991,
      "grad_norm": 0.6333361868228848,
      "learning_rate": 3.810972899925575e-05,
      "loss": 0.5929,
      "step": 11555
    },
    {
      "epoch": 0.7415009621552278,
      "grad_norm": 1.2414234428777005,
      "learning_rate": 3.802183324934952e-05,
      "loss": 0.6754,
      "step": 11560
    },
    {
      "epoch": 0.7418216805644644,
      "grad_norm": 0.8678280206604037,
      "learning_rate": 3.793401517800672e-05,
      "loss": 0.434,
      "step": 11565
    },
    {
      "epoch": 0.742142398973701,
      "grad_norm": 0.8589814705072975,
      "learning_rate": 3.784627489529177e-05,
      "loss": 0.7005,
      "step": 11570
    },
    {
      "epoch": 0.7424631173829378,
      "grad_norm": 1.096069158153898,
      "learning_rate": 3.775861251117128e-05,
      "loss": 0.6066,
      "step": 11575
    },
    {
      "epoch": 0.7427838357921744,
      "grad_norm": 0.8956575121848285,
      "learning_rate": 3.76710281355145e-05,
      "loss": 0.5453,
      "step": 11580
    },
    {
      "epoch": 0.7431045542014112,
      "grad_norm": 0.9901238623869012,
      "learning_rate": 3.7583521878092766e-05,
      "loss": 0.6829,
      "step": 11585
    },
    {
      "epoch": 0.7434252726106478,
      "grad_norm": 1.1556330315855146,
      "learning_rate": 3.749609384857952e-05,
      "loss": 0.6617,
      "step": 11590
    },
    {
      "epoch": 0.7437459910198846,
      "grad_norm": 0.8946200380979793,
      "learning_rate": 3.7408744156550235e-05,
      "loss": 0.6454,
      "step": 11595
    },
    {
      "epoch": 0.7440667094291212,
      "grad_norm": 0.6811470722359575,
      "learning_rate": 3.73214729114821e-05,
      "loss": 0.558,
      "step": 11600
    },
    {
      "epoch": 0.7443874278383579,
      "grad_norm": 1.2129672803037883,
      "learning_rate": 3.72342802227541e-05,
      "loss": 0.6829,
      "step": 11605
    },
    {
      "epoch": 0.7447081462475946,
      "grad_norm": 0.7287815359687029,
      "learning_rate": 3.7147166199646665e-05,
      "loss": 0.7291,
      "step": 11610
    },
    {
      "epoch": 0.7450288646568313,
      "grad_norm": 0.7381906467511818,
      "learning_rate": 3.706013095134162e-05,
      "loss": 0.673,
      "step": 11615
    },
    {
      "epoch": 0.745349583066068,
      "grad_norm": 1.2592430310132843,
      "learning_rate": 3.697317458692219e-05,
      "loss": 0.6236,
      "step": 11620
    },
    {
      "epoch": 0.7456703014753047,
      "grad_norm": 0.6359130442368803,
      "learning_rate": 3.688629721537256e-05,
      "loss": 0.6774,
      "step": 11625
    },
    {
      "epoch": 0.7459910198845414,
      "grad_norm": 0.9163313019367859,
      "learning_rate": 3.679949894557808e-05,
      "loss": 0.6353,
      "step": 11630
    },
    {
      "epoch": 0.7463117382937781,
      "grad_norm": 0.66124758919148,
      "learning_rate": 3.671277988632484e-05,
      "loss": 0.6667,
      "step": 11635
    },
    {
      "epoch": 0.7466324567030147,
      "grad_norm": 1.093053112833277,
      "learning_rate": 3.6626140146299715e-05,
      "loss": 0.6706,
      "step": 11640
    },
    {
      "epoch": 0.7469531751122515,
      "grad_norm": 0.585918591610346,
      "learning_rate": 3.653957983409012e-05,
      "loss": 0.596,
      "step": 11645
    },
    {
      "epoch": 0.7472738935214881,
      "grad_norm": 0.8785492282676739,
      "learning_rate": 3.6453099058183936e-05,
      "loss": 0.8345,
      "step": 11650
    },
    {
      "epoch": 0.7475946119307249,
      "grad_norm": 1.0886821917358311,
      "learning_rate": 3.6366697926969415e-05,
      "loss": 0.7223,
      "step": 11655
    },
    {
      "epoch": 0.7479153303399615,
      "grad_norm": 0.8352362172770396,
      "learning_rate": 3.628037654873489e-05,
      "loss": 0.7974,
      "step": 11660
    },
    {
      "epoch": 0.7482360487491982,
      "grad_norm": 0.6846055972157917,
      "learning_rate": 3.619413503166888e-05,
      "loss": 0.7061,
      "step": 11665
    },
    {
      "epoch": 0.7485567671584349,
      "grad_norm": 1.1651393765637517,
      "learning_rate": 3.610797348385965e-05,
      "loss": 0.6326,
      "step": 11670
    },
    {
      "epoch": 0.7488774855676715,
      "grad_norm": 0.8887525600265255,
      "learning_rate": 3.60218920132953e-05,
      "loss": 0.6543,
      "step": 11675
    },
    {
      "epoch": 0.7491982039769083,
      "grad_norm": 0.47701205334570973,
      "learning_rate": 3.5935890727863653e-05,
      "loss": 0.5758,
      "step": 11680
    },
    {
      "epoch": 0.7495189223861449,
      "grad_norm": 1.0003500503360518,
      "learning_rate": 3.5849969735351917e-05,
      "loss": 0.7507,
      "step": 11685
    },
    {
      "epoch": 0.7498396407953817,
      "grad_norm": 0.9203454434610632,
      "learning_rate": 3.57641291434467e-05,
      "loss": 0.7704,
      "step": 11690
    },
    {
      "epoch": 0.7501603592046183,
      "grad_norm": 1.035485843783069,
      "learning_rate": 3.5678369059733884e-05,
      "loss": 0.7227,
      "step": 11695
    },
    {
      "epoch": 0.7504810776138551,
      "grad_norm": 0.8574293258900955,
      "learning_rate": 3.559268959169842e-05,
      "loss": 0.5932,
      "step": 11700
    },
    {
      "epoch": 0.7508017960230917,
      "grad_norm": 1.0713424994868566,
      "learning_rate": 3.55070908467242e-05,
      "loss": 0.7351,
      "step": 11705
    },
    {
      "epoch": 0.7511225144323285,
      "grad_norm": 0.7637351663255856,
      "learning_rate": 3.542157293209394e-05,
      "loss": 0.5982,
      "step": 11710
    },
    {
      "epoch": 0.7514432328415651,
      "grad_norm": 0.7283758639132564,
      "learning_rate": 3.533613595498914e-05,
      "loss": 0.6919,
      "step": 11715
    },
    {
      "epoch": 0.7517639512508018,
      "grad_norm": 0.9199615101682994,
      "learning_rate": 3.525078002248974e-05,
      "loss": 0.834,
      "step": 11720
    },
    {
      "epoch": 0.7520846696600385,
      "grad_norm": 0.685052311744196,
      "learning_rate": 3.516550524157415e-05,
      "loss": 0.7766,
      "step": 11725
    },
    {
      "epoch": 0.7524053880692751,
      "grad_norm": 0.9557933778705214,
      "learning_rate": 3.508031171911913e-05,
      "loss": 0.7334,
      "step": 11730
    },
    {
      "epoch": 0.7527261064785119,
      "grad_norm": 0.8217799938196116,
      "learning_rate": 3.4995199561899496e-05,
      "loss": 0.6719,
      "step": 11735
    },
    {
      "epoch": 0.7530468248877485,
      "grad_norm": 0.8490165290571312,
      "learning_rate": 3.491016887658819e-05,
      "loss": 0.6352,
      "step": 11740
    },
    {
      "epoch": 0.7533675432969853,
      "grad_norm": 1.0096737759482532,
      "learning_rate": 3.4825219769755955e-05,
      "loss": 0.6278,
      "step": 11745
    },
    {
      "epoch": 0.7536882617062219,
      "grad_norm": 0.8116824311381272,
      "learning_rate": 3.4740352347871294e-05,
      "loss": 0.5794,
      "step": 11750
    },
    {
      "epoch": 0.7540089801154586,
      "grad_norm": 1.0567664205528664,
      "learning_rate": 3.4655566717300433e-05,
      "loss": 0.5817,
      "step": 11755
    },
    {
      "epoch": 0.7543296985246953,
      "grad_norm": 0.8458879335378663,
      "learning_rate": 3.457086298430696e-05,
      "loss": 0.5779,
      "step": 11760
    },
    {
      "epoch": 0.754650416933932,
      "grad_norm": 0.8982863213171639,
      "learning_rate": 3.448624125505194e-05,
      "loss": 0.6697,
      "step": 11765
    },
    {
      "epoch": 0.7549711353431687,
      "grad_norm": 0.8975989314029491,
      "learning_rate": 3.440170163559355e-05,
      "loss": 0.7032,
      "step": 11770
    },
    {
      "epoch": 0.7552918537524054,
      "grad_norm": 0.8729443546989577,
      "learning_rate": 3.4317244231887125e-05,
      "loss": 0.8033,
      "step": 11775
    },
    {
      "epoch": 0.7556125721616421,
      "grad_norm": 1.0239920545191055,
      "learning_rate": 3.423286914978493e-05,
      "loss": 0.672,
      "step": 11780
    },
    {
      "epoch": 0.7559332905708788,
      "grad_norm": 0.7010189828092076,
      "learning_rate": 3.414857649503602e-05,
      "loss": 0.6409,
      "step": 11785
    },
    {
      "epoch": 0.7562540089801154,
      "grad_norm": 0.8719062018189001,
      "learning_rate": 3.4064366373286274e-05,
      "loss": 0.7164,
      "step": 11790
    },
    {
      "epoch": 0.7565747273893522,
      "grad_norm": 0.7198915627914316,
      "learning_rate": 3.398023889007794e-05,
      "loss": 0.6249,
      "step": 11795
    },
    {
      "epoch": 0.7568954457985888,
      "grad_norm": 0.8718719431875859,
      "learning_rate": 3.389619415084989e-05,
      "loss": 0.6064,
      "step": 11800
    },
    {
      "epoch": 0.7572161642078256,
      "grad_norm": 0.8120042747717762,
      "learning_rate": 3.381223226093715e-05,
      "loss": 0.5433,
      "step": 11805
    },
    {
      "epoch": 0.7575368826170622,
      "grad_norm": 0.9647874073108456,
      "learning_rate": 3.3728353325570915e-05,
      "loss": 0.7064,
      "step": 11810
    },
    {
      "epoch": 0.757857601026299,
      "grad_norm": 1.2538875949194586,
      "learning_rate": 3.364455744987853e-05,
      "loss": 0.5527,
      "step": 11815
    },
    {
      "epoch": 0.7581783194355356,
      "grad_norm": 1.178257170426357,
      "learning_rate": 3.35608447388831e-05,
      "loss": 0.6565,
      "step": 11820
    },
    {
      "epoch": 0.7584990378447722,
      "grad_norm": 0.8864713208910722,
      "learning_rate": 3.3477215297503605e-05,
      "loss": 0.5459,
      "step": 11825
    },
    {
      "epoch": 0.758819756254009,
      "grad_norm": 0.81482691903865,
      "learning_rate": 3.339366923055458e-05,
      "loss": 0.6798,
      "step": 11830
    },
    {
      "epoch": 0.7591404746632456,
      "grad_norm": 0.7808704507490104,
      "learning_rate": 3.3310206642746125e-05,
      "loss": 0.6767,
      "step": 11835
    },
    {
      "epoch": 0.7594611930724824,
      "grad_norm": 0.6063874143510388,
      "learning_rate": 3.3226827638683665e-05,
      "loss": 0.7335,
      "step": 11840
    },
    {
      "epoch": 0.759781911481719,
      "grad_norm": 0.9081154038511268,
      "learning_rate": 3.3143532322867865e-05,
      "loss": 0.7284,
      "step": 11845
    },
    {
      "epoch": 0.7601026298909558,
      "grad_norm": 0.845045773951182,
      "learning_rate": 3.306032079969459e-05,
      "loss": 0.7782,
      "step": 11850
    },
    {
      "epoch": 0.7604233483001924,
      "grad_norm": 0.8991436429034236,
      "learning_rate": 3.29771931734546e-05,
      "loss": 0.7148,
      "step": 11855
    },
    {
      "epoch": 0.7607440667094291,
      "grad_norm": 0.9742693305593477,
      "learning_rate": 3.2894149548333495e-05,
      "loss": 0.6244,
      "step": 11860
    },
    {
      "epoch": 0.7610647851186658,
      "grad_norm": 0.6773700996601912,
      "learning_rate": 3.281119002841169e-05,
      "loss": 0.5872,
      "step": 11865
    },
    {
      "epoch": 0.7613855035279025,
      "grad_norm": 0.8384804126775537,
      "learning_rate": 3.2728314717664055e-05,
      "loss": 0.7845,
      "step": 11870
    },
    {
      "epoch": 0.7617062219371392,
      "grad_norm": 1.1357544575552236,
      "learning_rate": 3.264552371996008e-05,
      "loss": 0.6953,
      "step": 11875
    },
    {
      "epoch": 0.7620269403463759,
      "grad_norm": 0.8516566580601438,
      "learning_rate": 3.256281713906343e-05,
      "loss": 0.7256,
      "step": 11880
    },
    {
      "epoch": 0.7623476587556126,
      "grad_norm": 1.2370541167396898,
      "learning_rate": 3.248019507863203e-05,
      "loss": 0.7604,
      "step": 11885
    },
    {
      "epoch": 0.7626683771648493,
      "grad_norm": 0.9542563866917992,
      "learning_rate": 3.2397657642217926e-05,
      "loss": 0.5988,
      "step": 11890
    },
    {
      "epoch": 0.762989095574086,
      "grad_norm": 1.0432964488893417,
      "learning_rate": 3.2315204933266996e-05,
      "loss": 0.6991,
      "step": 11895
    },
    {
      "epoch": 0.7633098139833226,
      "grad_norm": 1.0011228778914865,
      "learning_rate": 3.223283705511908e-05,
      "loss": 0.7298,
      "step": 11900
    },
    {
      "epoch": 0.7636305323925593,
      "grad_norm": 1.5274397488438434,
      "learning_rate": 3.215055411100748e-05,
      "loss": 0.6428,
      "step": 11905
    },
    {
      "epoch": 0.763951250801796,
      "grad_norm": 0.876587920734237,
      "learning_rate": 3.2068356204059255e-05,
      "loss": 0.7244,
      "step": 11910
    },
    {
      "epoch": 0.7642719692110327,
      "grad_norm": 0.6121339451327354,
      "learning_rate": 3.198624343729479e-05,
      "loss": 0.7324,
      "step": 11915
    },
    {
      "epoch": 0.7645926876202694,
      "grad_norm": 0.8464048080490233,
      "learning_rate": 3.190421591362772e-05,
      "loss": 0.7464,
      "step": 11920
    },
    {
      "epoch": 0.7649134060295061,
      "grad_norm": 0.9880557475834854,
      "learning_rate": 3.1822273735864984e-05,
      "loss": 0.71,
      "step": 11925
    },
    {
      "epoch": 0.7652341244387428,
      "grad_norm": 1.0295342644337049,
      "learning_rate": 3.174041700670638e-05,
      "loss": 0.4895,
      "step": 11930
    },
    {
      "epoch": 0.7655548428479795,
      "grad_norm": 0.7076312841936536,
      "learning_rate": 3.165864582874477e-05,
      "loss": 0.691,
      "step": 11935
    },
    {
      "epoch": 0.7658755612572161,
      "grad_norm": 1.0135591193887252,
      "learning_rate": 3.1576960304465705e-05,
      "loss": 0.6266,
      "step": 11940
    },
    {
      "epoch": 0.7661962796664529,
      "grad_norm": 1.0323761526191306,
      "learning_rate": 3.149536053624735e-05,
      "loss": 0.7654,
      "step": 11945
    },
    {
      "epoch": 0.7665169980756895,
      "grad_norm": 1.55635605359068,
      "learning_rate": 3.1413846626360536e-05,
      "loss": 0.7714,
      "step": 11950
    },
    {
      "epoch": 0.7668377164849263,
      "grad_norm": 0.9497662276751877,
      "learning_rate": 3.133241867696829e-05,
      "loss": 0.6683,
      "step": 11955
    },
    {
      "epoch": 0.7671584348941629,
      "grad_norm": 0.8979757336357795,
      "learning_rate": 3.1251076790126086e-05,
      "loss": 0.7516,
      "step": 11960
    },
    {
      "epoch": 0.7674791533033997,
      "grad_norm": 0.764820887022675,
      "learning_rate": 3.1169821067781425e-05,
      "loss": 0.5679,
      "step": 11965
    },
    {
      "epoch": 0.7677998717126363,
      "grad_norm": 0.5942733392588654,
      "learning_rate": 3.1088651611773834e-05,
      "loss": 0.5194,
      "step": 11970
    },
    {
      "epoch": 0.768120590121873,
      "grad_norm": 0.9490603016131256,
      "learning_rate": 3.100756852383473e-05,
      "loss": 0.5963,
      "step": 11975
    },
    {
      "epoch": 0.7684413085311097,
      "grad_norm": 0.7616783689998372,
      "learning_rate": 3.092657190558727e-05,
      "loss": 0.6785,
      "step": 11980
    },
    {
      "epoch": 0.7687620269403463,
      "grad_norm": 0.830417639785896,
      "learning_rate": 3.084566185854628e-05,
      "loss": 0.5892,
      "step": 11985
    },
    {
      "epoch": 0.7690827453495831,
      "grad_norm": 1.0515557973724121,
      "learning_rate": 3.076483848411803e-05,
      "loss": 0.6846,
      "step": 11990
    },
    {
      "epoch": 0.7694034637588197,
      "grad_norm": 0.9480637021643955,
      "learning_rate": 3.068410188360022e-05,
      "loss": 0.741,
      "step": 11995
    },
    {
      "epoch": 0.7697241821680565,
      "grad_norm": 0.9435811108298884,
      "learning_rate": 3.0603452158181744e-05,
      "loss": 0.7019,
      "step": 12000
    },
    {
      "epoch": 0.7700449005772931,
      "grad_norm": 0.7019989507064325,
      "learning_rate": 3.052288940894259e-05,
      "loss": 0.5835,
      "step": 12005
    },
    {
      "epoch": 0.7703656189865298,
      "grad_norm": 0.6770008543875123,
      "learning_rate": 3.0442413736853846e-05,
      "loss": 0.6826,
      "step": 12010
    },
    {
      "epoch": 0.7706863373957665,
      "grad_norm": 0.7178710129095005,
      "learning_rate": 3.036202524277735e-05,
      "loss": 0.7033,
      "step": 12015
    },
    {
      "epoch": 0.7710070558050032,
      "grad_norm": 0.7298827842977621,
      "learning_rate": 3.0281724027465708e-05,
      "loss": 0.6847,
      "step": 12020
    },
    {
      "epoch": 0.7713277742142399,
      "grad_norm": 1.2518124809303286,
      "learning_rate": 3.020151019156221e-05,
      "loss": 0.5659,
      "step": 12025
    },
    {
      "epoch": 0.7716484926234766,
      "grad_norm": 0.7542697248961158,
      "learning_rate": 3.0121383835600513e-05,
      "loss": 0.7575,
      "step": 12030
    },
    {
      "epoch": 0.7719692110327133,
      "grad_norm": 0.779461786694263,
      "learning_rate": 3.0041345060004776e-05,
      "loss": 0.7238,
      "step": 12035
    },
    {
      "epoch": 0.77228992944195,
      "grad_norm": 1.0655675292269764,
      "learning_rate": 2.9961393965089203e-05,
      "loss": 0.7475,
      "step": 12040
    },
    {
      "epoch": 0.7726106478511866,
      "grad_norm": 1.1044101389504177,
      "learning_rate": 2.98815306510583e-05,
      "loss": 0.6353,
      "step": 12045
    },
    {
      "epoch": 0.7729313662604234,
      "grad_norm": 0.8533414942650657,
      "learning_rate": 2.9801755218006433e-05,
      "loss": 0.5867,
      "step": 12050
    },
    {
      "epoch": 0.77325208466966,
      "grad_norm": 1.0958682723686255,
      "learning_rate": 2.9722067765917838e-05,
      "loss": 0.5739,
      "step": 12055
    },
    {
      "epoch": 0.7735728030788968,
      "grad_norm": 0.7152332630816656,
      "learning_rate": 2.9642468394666557e-05,
      "loss": 0.6729,
      "step": 12060
    },
    {
      "epoch": 0.7738935214881334,
      "grad_norm": 0.9986989562442445,
      "learning_rate": 2.956295720401612e-05,
      "loss": 0.6726,
      "step": 12065
    },
    {
      "epoch": 0.7742142398973701,
      "grad_norm": 0.9811723796412208,
      "learning_rate": 2.9483534293619685e-05,
      "loss": 0.5619,
      "step": 12070
    },
    {
      "epoch": 0.7745349583066068,
      "grad_norm": 0.9118000616924434,
      "learning_rate": 2.9404199763019645e-05,
      "loss": 0.6516,
      "step": 12075
    },
    {
      "epoch": 0.7748556767158435,
      "grad_norm": 0.8942392291019036,
      "learning_rate": 2.932495371164764e-05,
      "loss": 0.7949,
      "step": 12080
    },
    {
      "epoch": 0.7751763951250802,
      "grad_norm": 0.9745393445698103,
      "learning_rate": 2.9245796238824496e-05,
      "loss": 0.6836,
      "step": 12085
    },
    {
      "epoch": 0.7754971135343168,
      "grad_norm": 0.624918898789372,
      "learning_rate": 2.916672744375991e-05,
      "loss": 0.5384,
      "step": 12090
    },
    {
      "epoch": 0.7758178319435536,
      "grad_norm": 0.7577038101937041,
      "learning_rate": 2.908774742555257e-05,
      "loss": 0.7673,
      "step": 12095
    },
    {
      "epoch": 0.7761385503527902,
      "grad_norm": 1.0261935822819983,
      "learning_rate": 2.9008856283189778e-05,
      "loss": 0.5503,
      "step": 12100
    },
    {
      "epoch": 0.776459268762027,
      "grad_norm": 0.8962534874969645,
      "learning_rate": 2.8930054115547488e-05,
      "loss": 0.6463,
      "step": 12105
    },
    {
      "epoch": 0.7767799871712636,
      "grad_norm": 0.70250181904508,
      "learning_rate": 2.8851341021390155e-05,
      "loss": 0.5889,
      "step": 12110
    },
    {
      "epoch": 0.7771007055805004,
      "grad_norm": 0.6163717028953168,
      "learning_rate": 2.877271709937056e-05,
      "loss": 0.6057,
      "step": 12115
    },
    {
      "epoch": 0.777421423989737,
      "grad_norm": 1.139236879333557,
      "learning_rate": 2.8694182448029795e-05,
      "loss": 0.6143,
      "step": 12120
    },
    {
      "epoch": 0.7777421423989737,
      "grad_norm": 0.8597109154676085,
      "learning_rate": 2.8615737165796974e-05,
      "loss": 0.6156,
      "step": 12125
    },
    {
      "epoch": 0.7780628608082104,
      "grad_norm": 1.0377068227971646,
      "learning_rate": 2.8537381350989288e-05,
      "loss": 0.7131,
      "step": 12130
    },
    {
      "epoch": 0.778383579217447,
      "grad_norm": 0.9278713523838525,
      "learning_rate": 2.8459115101811752e-05,
      "loss": 0.5643,
      "step": 12135
    },
    {
      "epoch": 0.7787042976266838,
      "grad_norm": 0.9111079193714665,
      "learning_rate": 2.838093851635708e-05,
      "loss": 0.7114,
      "step": 12140
    },
    {
      "epoch": 0.7790250160359204,
      "grad_norm": 0.636013231630343,
      "learning_rate": 2.8302851692605748e-05,
      "loss": 0.5425,
      "step": 12145
    },
    {
      "epoch": 0.7793457344451572,
      "grad_norm": 0.9437606048473691,
      "learning_rate": 2.8224854728425555e-05,
      "loss": 0.7358,
      "step": 12150
    },
    {
      "epoch": 0.7796664528543938,
      "grad_norm": 0.9877250051200861,
      "learning_rate": 2.814694772157184e-05,
      "loss": 0.7881,
      "step": 12155
    },
    {
      "epoch": 0.7799871712636305,
      "grad_norm": 0.6355892070558739,
      "learning_rate": 2.806913076968709e-05,
      "loss": 0.5765,
      "step": 12160
    },
    {
      "epoch": 0.7803078896728672,
      "grad_norm": 0.8553618089212107,
      "learning_rate": 2.7991403970300923e-05,
      "loss": 0.6339,
      "step": 12165
    },
    {
      "epoch": 0.7806286080821039,
      "grad_norm": 0.7956244875523378,
      "learning_rate": 2.7913767420830105e-05,
      "loss": 0.6316,
      "step": 12170
    },
    {
      "epoch": 0.7809493264913406,
      "grad_norm": 0.74745099568378,
      "learning_rate": 2.7836221218578052e-05,
      "loss": 0.5178,
      "step": 12175
    },
    {
      "epoch": 0.7812700449005773,
      "grad_norm": 2.797197105902477,
      "learning_rate": 2.775876546073518e-05,
      "loss": 0.7453,
      "step": 12180
    },
    {
      "epoch": 0.781590763309814,
      "grad_norm": 0.8203117179056878,
      "learning_rate": 2.768140024437842e-05,
      "loss": 0.7123,
      "step": 12185
    },
    {
      "epoch": 0.7819114817190507,
      "grad_norm": 0.8491800107534502,
      "learning_rate": 2.7604125666471202e-05,
      "loss": 0.6031,
      "step": 12190
    },
    {
      "epoch": 0.7822322001282873,
      "grad_norm": 0.7920825834762689,
      "learning_rate": 2.7526941823863494e-05,
      "loss": 0.6918,
      "step": 12195
    },
    {
      "epoch": 0.7825529185375241,
      "grad_norm": 0.8070095630772426,
      "learning_rate": 2.744984881329139e-05,
      "loss": 0.5921,
      "step": 12200
    },
    {
      "epoch": 0.7828736369467607,
      "grad_norm": 0.6455255637368961,
      "learning_rate": 2.7372846731377265e-05,
      "loss": 0.6382,
      "step": 12205
    },
    {
      "epoch": 0.7831943553559975,
      "grad_norm": 0.92556283214074,
      "learning_rate": 2.7295935674629457e-05,
      "loss": 0.5116,
      "step": 12210
    },
    {
      "epoch": 0.7835150737652341,
      "grad_norm": 1.1170799846804207,
      "learning_rate": 2.7219115739442215e-05,
      "loss": 0.6566,
      "step": 12215
    },
    {
      "epoch": 0.7838357921744709,
      "grad_norm": 0.5890009042735036,
      "learning_rate": 2.7142387022095638e-05,
      "loss": 0.6128,
      "step": 12220
    },
    {
      "epoch": 0.7841565105837075,
      "grad_norm": 0.6327668177080631,
      "learning_rate": 2.7065749618755455e-05,
      "loss": 0.6366,
      "step": 12225
    },
    {
      "epoch": 0.7844772289929441,
      "grad_norm": 0.8664538277798131,
      "learning_rate": 2.698920362547299e-05,
      "loss": 0.6013,
      "step": 12230
    },
    {
      "epoch": 0.7847979474021809,
      "grad_norm": 0.7003044665428215,
      "learning_rate": 2.6912749138184956e-05,
      "loss": 0.7929,
      "step": 12235
    },
    {
      "epoch": 0.7851186658114175,
      "grad_norm": 0.7853265661064053,
      "learning_rate": 2.6836386252713396e-05,
      "loss": 0.7137,
      "step": 12240
    },
    {
      "epoch": 0.7854393842206543,
      "grad_norm": 0.909806347924112,
      "learning_rate": 2.6760115064765568e-05,
      "loss": 0.6994,
      "step": 12245
    },
    {
      "epoch": 0.7857601026298909,
      "grad_norm": 0.8351806612159146,
      "learning_rate": 2.6683935669933736e-05,
      "loss": 0.6935,
      "step": 12250
    },
    {
      "epoch": 0.7860808210391277,
      "grad_norm": 0.7611491943408887,
      "learning_rate": 2.6607848163695227e-05,
      "loss": 0.7319,
      "step": 12255
    },
    {
      "epoch": 0.7864015394483643,
      "grad_norm": 1.122080599336026,
      "learning_rate": 2.6531852641412082e-05,
      "loss": 0.6022,
      "step": 12260
    },
    {
      "epoch": 0.7867222578576011,
      "grad_norm": 1.1817121943287525,
      "learning_rate": 2.645594919833119e-05,
      "loss": 0.7494,
      "step": 12265
    },
    {
      "epoch": 0.7870429762668377,
      "grad_norm": 0.7929071478719117,
      "learning_rate": 2.6380137929583914e-05,
      "loss": 0.7783,
      "step": 12270
    },
    {
      "epoch": 0.7873636946760744,
      "grad_norm": 0.820309764452619,
      "learning_rate": 2.6304418930186115e-05,
      "loss": 0.6332,
      "step": 12275
    },
    {
      "epoch": 0.7876844130853111,
      "grad_norm": 0.707291602928582,
      "learning_rate": 2.6228792295038106e-05,
      "loss": 0.537,
      "step": 12280
    },
    {
      "epoch": 0.7880051314945478,
      "grad_norm": 0.8141400312776754,
      "learning_rate": 2.6153258118924308e-05,
      "loss": 0.6322,
      "step": 12285
    },
    {
      "epoch": 0.7883258499037845,
      "grad_norm": 0.7187432563518902,
      "learning_rate": 2.6077816496513363e-05,
      "loss": 0.5032,
      "step": 12290
    },
    {
      "epoch": 0.7886465683130212,
      "grad_norm": 0.921998673200194,
      "learning_rate": 2.6002467522357867e-05,
      "loss": 0.6134,
      "step": 12295
    },
    {
      "epoch": 0.7889672867222579,
      "grad_norm": 1.4739251939697386,
      "learning_rate": 2.592721129089427e-05,
      "loss": 0.6579,
      "step": 12300
    },
    {
      "epoch": 0.7892880051314946,
      "grad_norm": 0.7698494785751436,
      "learning_rate": 2.5852047896442853e-05,
      "loss": 0.6832,
      "step": 12305
    },
    {
      "epoch": 0.7896087235407312,
      "grad_norm": 0.9676144058038108,
      "learning_rate": 2.577697743320746e-05,
      "loss": 0.6789,
      "step": 12310
    },
    {
      "epoch": 0.789929441949968,
      "grad_norm": 0.7989952533967423,
      "learning_rate": 2.570199999527557e-05,
      "loss": 0.683,
      "step": 12315
    },
    {
      "epoch": 0.7902501603592046,
      "grad_norm": 0.7540668642091226,
      "learning_rate": 2.5627115676617953e-05,
      "loss": 0.6137,
      "step": 12320
    },
    {
      "epoch": 0.7905708787684413,
      "grad_norm": 1.2363573852579546,
      "learning_rate": 2.555232457108879e-05,
      "loss": 0.6497,
      "step": 12325
    },
    {
      "epoch": 0.790891597177678,
      "grad_norm": 0.5683854501183521,
      "learning_rate": 2.5477626772425356e-05,
      "loss": 0.6996,
      "step": 12330
    },
    {
      "epoch": 0.7912123155869147,
      "grad_norm": 0.5533412352742278,
      "learning_rate": 2.5403022374247953e-05,
      "loss": 0.7001,
      "step": 12335
    },
    {
      "epoch": 0.7915330339961514,
      "grad_norm": 0.675236986686075,
      "learning_rate": 2.5328511470059935e-05,
      "loss": 0.5805,
      "step": 12340
    },
    {
      "epoch": 0.791853752405388,
      "grad_norm": 0.7285390988297157,
      "learning_rate": 2.5254094153247355e-05,
      "loss": 0.6149,
      "step": 12345
    },
    {
      "epoch": 0.7921744708146248,
      "grad_norm": 0.80400571870766,
      "learning_rate": 2.5179770517079093e-05,
      "loss": 0.6948,
      "step": 12350
    },
    {
      "epoch": 0.7924951892238614,
      "grad_norm": 0.9377676574780994,
      "learning_rate": 2.510554065470653e-05,
      "loss": 0.7308,
      "step": 12355
    },
    {
      "epoch": 0.7928159076330982,
      "grad_norm": 0.6446906934234106,
      "learning_rate": 2.5031404659163492e-05,
      "loss": 0.7255,
      "step": 12360
    },
    {
      "epoch": 0.7931366260423348,
      "grad_norm": 0.8158537224973699,
      "learning_rate": 2.495736262336632e-05,
      "loss": 0.7016,
      "step": 12365
    },
    {
      "epoch": 0.7934573444515716,
      "grad_norm": 0.9172314841106095,
      "learning_rate": 2.4883414640113357e-05,
      "loss": 0.6117,
      "step": 12370
    },
    {
      "epoch": 0.7937780628608082,
      "grad_norm": 0.7437504326268314,
      "learning_rate": 2.4809560802085274e-05,
      "loss": 0.6409,
      "step": 12375
    },
    {
      "epoch": 0.7940987812700449,
      "grad_norm": 0.6879611505056618,
      "learning_rate": 2.4735801201844645e-05,
      "loss": 0.6397,
      "step": 12380
    },
    {
      "epoch": 0.7944194996792816,
      "grad_norm": 0.9926575009144855,
      "learning_rate": 2.466213593183593e-05,
      "loss": 0.6966,
      "step": 12385
    },
    {
      "epoch": 0.7947402180885182,
      "grad_norm": 0.8127945292903275,
      "learning_rate": 2.458856508438544e-05,
      "loss": 0.7704,
      "step": 12390
    },
    {
      "epoch": 0.795060936497755,
      "grad_norm": 0.8871371492144181,
      "learning_rate": 2.451508875170104e-05,
      "loss": 0.5606,
      "step": 12395
    },
    {
      "epoch": 0.7953816549069916,
      "grad_norm": 0.8206919204372869,
      "learning_rate": 2.444170702587226e-05,
      "loss": 0.6932,
      "step": 12400
    },
    {
      "epoch": 0.7957023733162284,
      "grad_norm": 0.6603633676196071,
      "learning_rate": 2.436841999886994e-05,
      "loss": 0.6109,
      "step": 12405
    },
    {
      "epoch": 0.796023091725465,
      "grad_norm": 0.9151323413512733,
      "learning_rate": 2.4295227762546267e-05,
      "loss": 0.6631,
      "step": 12410
    },
    {
      "epoch": 0.7963438101347017,
      "grad_norm": 0.9827343805814039,
      "learning_rate": 2.422213040863468e-05,
      "loss": 0.6563,
      "step": 12415
    },
    {
      "epoch": 0.7966645285439384,
      "grad_norm": 0.9469619065977057,
      "learning_rate": 2.414912802874961e-05,
      "loss": 0.7412,
      "step": 12420
    },
    {
      "epoch": 0.7969852469531751,
      "grad_norm": 1.3131843532103706,
      "learning_rate": 2.4076220714386568e-05,
      "loss": 0.6886,
      "step": 12425
    },
    {
      "epoch": 0.7973059653624118,
      "grad_norm": 1.2148517258592102,
      "learning_rate": 2.40034085569218e-05,
      "loss": 0.6898,
      "step": 12430
    },
    {
      "epoch": 0.7976266837716485,
      "grad_norm": 0.8095565024509138,
      "learning_rate": 2.393069164761237e-05,
      "loss": 0.6122,
      "step": 12435
    },
    {
      "epoch": 0.7979474021808852,
      "grad_norm": 0.9467420200870824,
      "learning_rate": 2.3858070077595908e-05,
      "loss": 0.7174,
      "step": 12440
    },
    {
      "epoch": 0.7982681205901219,
      "grad_norm": 0.6202794025655268,
      "learning_rate": 2.3785543937890586e-05,
      "loss": 0.66,
      "step": 12445
    },
    {
      "epoch": 0.7985888389993585,
      "grad_norm": 1.0791006971385633,
      "learning_rate": 2.3713113319394997e-05,
      "loss": 0.5363,
      "step": 12450
    },
    {
      "epoch": 0.7989095574085953,
      "grad_norm": 1.026500892588481,
      "learning_rate": 2.3640778312887945e-05,
      "loss": 0.7948,
      "step": 12455
    },
    {
      "epoch": 0.7992302758178319,
      "grad_norm": 0.7967893717258743,
      "learning_rate": 2.35685390090285e-05,
      "loss": 0.6343,
      "step": 12460
    },
    {
      "epoch": 0.7995509942270687,
      "grad_norm": 1.1948126480397625,
      "learning_rate": 2.3496395498355694e-05,
      "loss": 0.7174,
      "step": 12465
    },
    {
      "epoch": 0.7998717126363053,
      "grad_norm": 0.8650772892603197,
      "learning_rate": 2.34243478712885e-05,
      "loss": 0.7018,
      "step": 12470
    },
    {
      "epoch": 0.800192431045542,
      "grad_norm": 0.49196395624702055,
      "learning_rate": 2.3352396218125827e-05,
      "loss": 0.5881,
      "step": 12475
    },
    {
      "epoch": 0.8005131494547787,
      "grad_norm": 0.7575733059076403,
      "learning_rate": 2.3280540629046143e-05,
      "loss": 0.7292,
      "step": 12480
    },
    {
      "epoch": 0.8008338678640154,
      "grad_norm": 0.8513796572354395,
      "learning_rate": 2.3208781194107664e-05,
      "loss": 0.6286,
      "step": 12485
    },
    {
      "epoch": 0.8011545862732521,
      "grad_norm": 0.734121779464679,
      "learning_rate": 2.3137118003248004e-05,
      "loss": 0.6818,
      "step": 12490
    },
    {
      "epoch": 0.8014753046824887,
      "grad_norm": 0.5881243074608535,
      "learning_rate": 2.306555114628415e-05,
      "loss": 0.6553,
      "step": 12495
    },
    {
      "epoch": 0.8017960230917255,
      "grad_norm": 0.6452008879569514,
      "learning_rate": 2.2994080712912435e-05,
      "loss": 0.705,
      "step": 12500
    },
    {
      "epoch": 0.8021167415009621,
      "grad_norm": 1.409626103322556,
      "learning_rate": 2.2922706792708194e-05,
      "loss": 0.5859,
      "step": 12505
    },
    {
      "epoch": 0.8024374599101989,
      "grad_norm": 0.7556485492806266,
      "learning_rate": 2.2851429475125963e-05,
      "loss": 0.6137,
      "step": 12510
    },
    {
      "epoch": 0.8027581783194355,
      "grad_norm": 0.9809427245901448,
      "learning_rate": 2.2780248849499088e-05,
      "loss": 0.7344,
      "step": 12515
    },
    {
      "epoch": 0.8030788967286723,
      "grad_norm": 0.38473648876347516,
      "learning_rate": 2.2709165005039802e-05,
      "loss": 0.4635,
      "step": 12520
    },
    {
      "epoch": 0.8033996151379089,
      "grad_norm": 0.7409973296233345,
      "learning_rate": 2.263817803083901e-05,
      "loss": 0.6076,
      "step": 12525
    },
    {
      "epoch": 0.8037203335471456,
      "grad_norm": 0.7165871670251992,
      "learning_rate": 2.256728801586616e-05,
      "loss": 0.6541,
      "step": 12530
    },
    {
      "epoch": 0.8040410519563823,
      "grad_norm": 0.8518968659931285,
      "learning_rate": 2.249649504896929e-05,
      "loss": 0.7555,
      "step": 12535
    },
    {
      "epoch": 0.804361770365619,
      "grad_norm": 0.9159683373230153,
      "learning_rate": 2.242579921887471e-05,
      "loss": 0.6843,
      "step": 12540
    },
    {
      "epoch": 0.8046824887748557,
      "grad_norm": 0.6228826380501181,
      "learning_rate": 2.2355200614186987e-05,
      "loss": 0.5394,
      "step": 12545
    },
    {
      "epoch": 0.8050032071840924,
      "grad_norm": 0.8002539057082869,
      "learning_rate": 2.2284699323388923e-05,
      "loss": 0.7345,
      "step": 12550
    },
    {
      "epoch": 0.8053239255933291,
      "grad_norm": 0.9766455426961175,
      "learning_rate": 2.2214295434841248e-05,
      "loss": 0.7367,
      "step": 12555
    },
    {
      "epoch": 0.8056446440025657,
      "grad_norm": 0.7046361659107024,
      "learning_rate": 2.2143989036782707e-05,
      "loss": 0.5187,
      "step": 12560
    },
    {
      "epoch": 0.8059653624118024,
      "grad_norm": 0.8108273818757799,
      "learning_rate": 2.2073780217329786e-05,
      "loss": 0.6532,
      "step": 12565
    },
    {
      "epoch": 0.8062860808210391,
      "grad_norm": 0.818379710541348,
      "learning_rate": 2.2003669064476706e-05,
      "loss": 0.6059,
      "step": 12570
    },
    {
      "epoch": 0.8066067992302758,
      "grad_norm": 0.984654681269158,
      "learning_rate": 2.1933655666095275e-05,
      "loss": 0.6525,
      "step": 12575
    },
    {
      "epoch": 0.8069275176395125,
      "grad_norm": 0.9567899833609597,
      "learning_rate": 2.186374010993476e-05,
      "loss": 0.7311,
      "step": 12580
    },
    {
      "epoch": 0.8072482360487492,
      "grad_norm": 0.7463705769882709,
      "learning_rate": 2.1793922483621876e-05,
      "loss": 0.6196,
      "step": 12585
    },
    {
      "epoch": 0.8075689544579859,
      "grad_norm": 0.9733520585461265,
      "learning_rate": 2.1724202874660492e-05,
      "loss": 0.7193,
      "step": 12590
    },
    {
      "epoch": 0.8078896728672226,
      "grad_norm": 0.7681175464199929,
      "learning_rate": 2.165458137043175e-05,
      "loss": 0.6522,
      "step": 12595
    },
    {
      "epoch": 0.8082103912764592,
      "grad_norm": 0.6886221085607587,
      "learning_rate": 2.158505805819374e-05,
      "loss": 0.6666,
      "step": 12600
    },
    {
      "epoch": 0.808531109685696,
      "grad_norm": 0.603328263564938,
      "learning_rate": 2.1515633025081484e-05,
      "loss": 0.667,
      "step": 12605
    },
    {
      "epoch": 0.8088518280949326,
      "grad_norm": 0.8470975793567042,
      "learning_rate": 2.1446306358106927e-05,
      "loss": 0.6453,
      "step": 12610
    },
    {
      "epoch": 0.8091725465041694,
      "grad_norm": 1.0220077328521942,
      "learning_rate": 2.1377078144158603e-05,
      "loss": 0.6582,
      "step": 12615
    },
    {
      "epoch": 0.809493264913406,
      "grad_norm": 0.7129620704949545,
      "learning_rate": 2.1307948470001782e-05,
      "loss": 0.5496,
      "step": 12620
    },
    {
      "epoch": 0.8098139833226428,
      "grad_norm": 0.6343852911809139,
      "learning_rate": 2.1238917422278116e-05,
      "loss": 0.5455,
      "step": 12625
    },
    {
      "epoch": 0.8101347017318794,
      "grad_norm": 0.36707540294038493,
      "learning_rate": 2.1169985087505694e-05,
      "loss": 0.6399,
      "step": 12630
    },
    {
      "epoch": 0.810455420141116,
      "grad_norm": 0.813228299713834,
      "learning_rate": 2.1101151552078944e-05,
      "loss": 0.6842,
      "step": 12635
    },
    {
      "epoch": 0.8107761385503528,
      "grad_norm": 0.6267132658473076,
      "learning_rate": 2.1032416902268314e-05,
      "loss": 0.5479,
      "step": 12640
    },
    {
      "epoch": 0.8110968569595894,
      "grad_norm": 1.275645304461915,
      "learning_rate": 2.0963781224220503e-05,
      "loss": 0.6785,
      "step": 12645
    },
    {
      "epoch": 0.8114175753688262,
      "grad_norm": 0.8576850457893269,
      "learning_rate": 2.0895244603957998e-05,
      "loss": 0.7868,
      "step": 12650
    },
    {
      "epoch": 0.8117382937780628,
      "grad_norm": 0.5639578214670323,
      "learning_rate": 2.082680712737929e-05,
      "loss": 0.5559,
      "step": 12655
    },
    {
      "epoch": 0.8120590121872996,
      "grad_norm": 1.1440696942831554,
      "learning_rate": 2.0758468880258486e-05,
      "loss": 0.7089,
      "step": 12660
    },
    {
      "epoch": 0.8123797305965362,
      "grad_norm": 0.8070604839659317,
      "learning_rate": 2.0690229948245365e-05,
      "loss": 0.6695,
      "step": 12665
    },
    {
      "epoch": 0.812700449005773,
      "grad_norm": 0.6244747169984161,
      "learning_rate": 2.0622090416865293e-05,
      "loss": 0.5854,
      "step": 12670
    },
    {
      "epoch": 0.8130211674150096,
      "grad_norm": 0.506375535891638,
      "learning_rate": 2.055405037151894e-05,
      "loss": 0.6383,
      "step": 12675
    },
    {
      "epoch": 0.8133418858242463,
      "grad_norm": 1.183001348716755,
      "learning_rate": 2.0486109897482407e-05,
      "loss": 0.6203,
      "step": 12680
    },
    {
      "epoch": 0.813662604233483,
      "grad_norm": 0.6143509135493088,
      "learning_rate": 2.0418269079906936e-05,
      "loss": 0.5593,
      "step": 12685
    },
    {
      "epoch": 0.8139833226427197,
      "grad_norm": 0.6234718472183463,
      "learning_rate": 2.0350528003818825e-05,
      "loss": 0.6459,
      "step": 12690
    },
    {
      "epoch": 0.8143040410519564,
      "grad_norm": 1.8693845624658407,
      "learning_rate": 2.0282886754119478e-05,
      "loss": 0.7211,
      "step": 12695
    },
    {
      "epoch": 0.8146247594611931,
      "grad_norm": 0.8258541488205007,
      "learning_rate": 2.0215345415585107e-05,
      "loss": 0.5976,
      "step": 12700
    },
    {
      "epoch": 0.8149454778704298,
      "grad_norm": 0.914739265249098,
      "learning_rate": 2.0147904072866695e-05,
      "loss": 0.6308,
      "step": 12705
    },
    {
      "epoch": 0.8152661962796665,
      "grad_norm": 0.7090505847389847,
      "learning_rate": 2.0080562810489935e-05,
      "loss": 0.727,
      "step": 12710
    },
    {
      "epoch": 0.8155869146889031,
      "grad_norm": 0.9339182937300688,
      "learning_rate": 2.001332171285505e-05,
      "loss": 0.6809,
      "step": 12715
    },
    {
      "epoch": 0.8159076330981399,
      "grad_norm": 0.925613865395883,
      "learning_rate": 1.9946180864236797e-05,
      "loss": 0.7004,
      "step": 12720
    },
    {
      "epoch": 0.8162283515073765,
      "grad_norm": 0.874166373614285,
      "learning_rate": 1.9879140348784177e-05,
      "loss": 0.6623,
      "step": 12725
    },
    {
      "epoch": 0.8165490699166132,
      "grad_norm": 0.8313132986404351,
      "learning_rate": 1.981220025052056e-05,
      "loss": 0.6177,
      "step": 12730
    },
    {
      "epoch": 0.8168697883258499,
      "grad_norm": 0.6383078710564455,
      "learning_rate": 1.9745360653343393e-05,
      "loss": 0.6089,
      "step": 12735
    },
    {
      "epoch": 0.8171905067350866,
      "grad_norm": 0.5929159065490891,
      "learning_rate": 1.9678621641024132e-05,
      "loss": 0.5833,
      "step": 12740
    },
    {
      "epoch": 0.8175112251443233,
      "grad_norm": 0.6839908339425101,
      "learning_rate": 1.961198329720827e-05,
      "loss": 0.6513,
      "step": 12745
    },
    {
      "epoch": 0.8178319435535599,
      "grad_norm": 0.43381578975254104,
      "learning_rate": 1.9545445705415012e-05,
      "loss": 0.655,
      "step": 12750
    },
    {
      "epoch": 0.8181526619627967,
      "grad_norm": 0.666728316560307,
      "learning_rate": 1.947900894903739e-05,
      "loss": 0.5284,
      "step": 12755
    },
    {
      "epoch": 0.8184733803720333,
      "grad_norm": 1.0911535549941562,
      "learning_rate": 1.9412673111342018e-05,
      "loss": 0.6534,
      "step": 12760
    },
    {
      "epoch": 0.8187940987812701,
      "grad_norm": 0.8721963911370444,
      "learning_rate": 1.934643827546899e-05,
      "loss": 0.7718,
      "step": 12765
    },
    {
      "epoch": 0.8191148171905067,
      "grad_norm": 0.9043104390757369,
      "learning_rate": 1.928030452443187e-05,
      "loss": 0.7249,
      "step": 12770
    },
    {
      "epoch": 0.8194355355997435,
      "grad_norm": 0.6520308339900129,
      "learning_rate": 1.9214271941117458e-05,
      "loss": 0.569,
      "step": 12775
    },
    {
      "epoch": 0.8197562540089801,
      "grad_norm": 1.0081351400932888,
      "learning_rate": 1.9148340608285863e-05,
      "loss": 0.6623,
      "step": 12780
    },
    {
      "epoch": 0.8200769724182168,
      "grad_norm": 0.6541686083293314,
      "learning_rate": 1.908251060857019e-05,
      "loss": 0.6006,
      "step": 12785
    },
    {
      "epoch": 0.8203976908274535,
      "grad_norm": 0.6996268349045872,
      "learning_rate": 1.901678202447663e-05,
      "loss": 0.6209,
      "step": 12790
    },
    {
      "epoch": 0.8207184092366901,
      "grad_norm": 0.6137399071233165,
      "learning_rate": 1.8951154938384207e-05,
      "loss": 0.7341,
      "step": 12795
    },
    {
      "epoch": 0.8210391276459269,
      "grad_norm": 0.6979894249139232,
      "learning_rate": 1.8885629432544717e-05,
      "loss": 0.6331,
      "step": 12800
    },
    {
      "epoch": 0.8213598460551635,
      "grad_norm": 1.4876520614972237,
      "learning_rate": 1.882020558908274e-05,
      "loss": 0.5262,
      "step": 12805
    },
    {
      "epoch": 0.8216805644644003,
      "grad_norm": 1.1310428300822517,
      "learning_rate": 1.8754883489995335e-05,
      "loss": 0.6548,
      "step": 12810
    },
    {
      "epoch": 0.8220012828736369,
      "grad_norm": 0.819858534428383,
      "learning_rate": 1.868966321715212e-05,
      "loss": 0.6514,
      "step": 12815
    },
    {
      "epoch": 0.8223220012828736,
      "grad_norm": 0.9699270159513138,
      "learning_rate": 1.8624544852295046e-05,
      "loss": 0.6668,
      "step": 12820
    },
    {
      "epoch": 0.8226427196921103,
      "grad_norm": 1.1171340784169779,
      "learning_rate": 1.8559528477038325e-05,
      "loss": 0.7466,
      "step": 12825
    },
    {
      "epoch": 0.822963438101347,
      "grad_norm": 0.9010920277558152,
      "learning_rate": 1.849461417286843e-05,
      "loss": 0.5722,
      "step": 12830
    },
    {
      "epoch": 0.8232841565105837,
      "grad_norm": 0.8446632185572971,
      "learning_rate": 1.8429802021143816e-05,
      "loss": 0.7673,
      "step": 12835
    },
    {
      "epoch": 0.8236048749198204,
      "grad_norm": 0.8445623736137308,
      "learning_rate": 1.8365092103094938e-05,
      "loss": 0.6343,
      "step": 12840
    },
    {
      "epoch": 0.8239255933290571,
      "grad_norm": 1.3224256501204117,
      "learning_rate": 1.83004844998241e-05,
      "loss": 0.6446,
      "step": 12845
    },
    {
      "epoch": 0.8242463117382938,
      "grad_norm": 1.2509505443818558,
      "learning_rate": 1.8235979292305448e-05,
      "loss": 0.5908,
      "step": 12850
    },
    {
      "epoch": 0.8245670301475305,
      "grad_norm": 1.045236864985607,
      "learning_rate": 1.8171576561384718e-05,
      "loss": 0.6833,
      "step": 12855
    },
    {
      "epoch": 0.8248877485567672,
      "grad_norm": 0.8131230488754208,
      "learning_rate": 1.8107276387779194e-05,
      "loss": 0.6713,
      "step": 12860
    },
    {
      "epoch": 0.8252084669660038,
      "grad_norm": 0.9987203815522278,
      "learning_rate": 1.8043078852077723e-05,
      "loss": 0.6382,
      "step": 12865
    },
    {
      "epoch": 0.8255291853752406,
      "grad_norm": 0.8378880198765352,
      "learning_rate": 1.797898403474041e-05,
      "loss": 0.651,
      "step": 12870
    },
    {
      "epoch": 0.8258499037844772,
      "grad_norm": 0.69860101125052,
      "learning_rate": 1.7914992016098652e-05,
      "loss": 0.6678,
      "step": 12875
    },
    {
      "epoch": 0.826170622193714,
      "grad_norm": 0.7906981356515638,
      "learning_rate": 1.7851102876355064e-05,
      "loss": 0.7724,
      "step": 12880
    },
    {
      "epoch": 0.8264913406029506,
      "grad_norm": 0.7220660188316776,
      "learning_rate": 1.778731669558322e-05,
      "loss": 0.7528,
      "step": 12885
    },
    {
      "epoch": 0.8268120590121874,
      "grad_norm": 0.8602114436332251,
      "learning_rate": 1.772363355372776e-05,
      "loss": 0.7355,
      "step": 12890
    },
    {
      "epoch": 0.827132777421424,
      "grad_norm": 0.7936909578079667,
      "learning_rate": 1.7660053530604103e-05,
      "loss": 0.5939,
      "step": 12895
    },
    {
      "epoch": 0.8274534958306606,
      "grad_norm": 0.7386556230325233,
      "learning_rate": 1.759657670589844e-05,
      "loss": 0.7065,
      "step": 12900
    },
    {
      "epoch": 0.8277742142398974,
      "grad_norm": 0.7508393958424202,
      "learning_rate": 1.7533203159167653e-05,
      "loss": 0.7995,
      "step": 12905
    },
    {
      "epoch": 0.828094932649134,
      "grad_norm": 1.484996895062748,
      "learning_rate": 1.7469932969839133e-05,
      "loss": 0.5822,
      "step": 12910
    },
    {
      "epoch": 0.8284156510583708,
      "grad_norm": 0.7889368806667416,
      "learning_rate": 1.7406766217210813e-05,
      "loss": 0.6915,
      "step": 12915
    },
    {
      "epoch": 0.8287363694676074,
      "grad_norm": 1.043078354293378,
      "learning_rate": 1.7343702980450882e-05,
      "loss": 0.6678,
      "step": 12920
    },
    {
      "epoch": 0.8290570878768442,
      "grad_norm": 0.5235441869984315,
      "learning_rate": 1.7280743338597903e-05,
      "loss": 0.6732,
      "step": 12925
    },
    {
      "epoch": 0.8293778062860808,
      "grad_norm": 0.9827303368182867,
      "learning_rate": 1.7217887370560527e-05,
      "loss": 0.5817,
      "step": 12930
    },
    {
      "epoch": 0.8296985246953175,
      "grad_norm": 0.8919025135393817,
      "learning_rate": 1.715513515511743e-05,
      "loss": 0.5394,
      "step": 12935
    },
    {
      "epoch": 0.8300192431045542,
      "grad_norm": 0.8422357074138689,
      "learning_rate": 1.7092486770917382e-05,
      "loss": 0.7755,
      "step": 12940
    },
    {
      "epoch": 0.8303399615137909,
      "grad_norm": 0.9473245373995116,
      "learning_rate": 1.7029942296478885e-05,
      "loss": 0.6846,
      "step": 12945
    },
    {
      "epoch": 0.8306606799230276,
      "grad_norm": 0.6373840068433619,
      "learning_rate": 1.6967501810190323e-05,
      "loss": 0.6543,
      "step": 12950
    },
    {
      "epoch": 0.8309813983322643,
      "grad_norm": 0.7843610971634594,
      "learning_rate": 1.6905165390309665e-05,
      "loss": 0.6431,
      "step": 12955
    },
    {
      "epoch": 0.831302116741501,
      "grad_norm": 1.1652096610055944,
      "learning_rate": 1.6842933114964466e-05,
      "loss": 0.8221,
      "step": 12960
    },
    {
      "epoch": 0.8316228351507376,
      "grad_norm": 0.8194937278113069,
      "learning_rate": 1.6780805062151816e-05,
      "loss": 0.5232,
      "step": 12965
    },
    {
      "epoch": 0.8319435535599743,
      "grad_norm": 1.188666287581691,
      "learning_rate": 1.6718781309738073e-05,
      "loss": 0.6604,
      "step": 12970
    },
    {
      "epoch": 0.832264271969211,
      "grad_norm": 0.8641382912001553,
      "learning_rate": 1.665686193545898e-05,
      "loss": 0.5844,
      "step": 12975
    },
    {
      "epoch": 0.8325849903784477,
      "grad_norm": 0.7062740744596516,
      "learning_rate": 1.6595047016919373e-05,
      "loss": 0.6843,
      "step": 12980
    },
    {
      "epoch": 0.8329057087876844,
      "grad_norm": 1.7666107387397485,
      "learning_rate": 1.6533336631593276e-05,
      "loss": 0.5533,
      "step": 12985
    },
    {
      "epoch": 0.8332264271969211,
      "grad_norm": 0.6713809127329562,
      "learning_rate": 1.6471730856823587e-05,
      "loss": 0.5803,
      "step": 12990
    },
    {
      "epoch": 0.8335471456061578,
      "grad_norm": 0.789870715650865,
      "learning_rate": 1.6410229769822137e-05,
      "loss": 0.5722,
      "step": 12995
    },
    {
      "epoch": 0.8338678640153945,
      "grad_norm": 0.694543681011162,
      "learning_rate": 1.6348833447669596e-05,
      "loss": 0.7518,
      "step": 13000
    },
    {
      "epoch": 0.8341885824246311,
      "grad_norm": 0.9060155570486944,
      "learning_rate": 1.6287541967315246e-05,
      "loss": 0.6968,
      "step": 13005
    },
    {
      "epoch": 0.8345093008338679,
      "grad_norm": 0.7521276185282114,
      "learning_rate": 1.6226355405577052e-05,
      "loss": 0.7398,
      "step": 13010
    },
    {
      "epoch": 0.8348300192431045,
      "grad_norm": 0.6239824879078599,
      "learning_rate": 1.6165273839141425e-05,
      "loss": 0.5993,
      "step": 13015
    },
    {
      "epoch": 0.8351507376523413,
      "grad_norm": 0.8788280197433859,
      "learning_rate": 1.610429734456317e-05,
      "loss": 0.5281,
      "step": 13020
    },
    {
      "epoch": 0.8354714560615779,
      "grad_norm": 0.5708218830810341,
      "learning_rate": 1.604342599826548e-05,
      "loss": 0.6636,
      "step": 13025
    },
    {
      "epoch": 0.8357921744708147,
      "grad_norm": 0.9995506015609548,
      "learning_rate": 1.5982659876539706e-05,
      "loss": 0.6224,
      "step": 13030
    },
    {
      "epoch": 0.8361128928800513,
      "grad_norm": 0.6985670528256153,
      "learning_rate": 1.5921999055545322e-05,
      "loss": 0.7875,
      "step": 13035
    },
    {
      "epoch": 0.8364336112892881,
      "grad_norm": 1.1017729058211603,
      "learning_rate": 1.5861443611309836e-05,
      "loss": 0.5689,
      "step": 13040
    },
    {
      "epoch": 0.8367543296985247,
      "grad_norm": 0.6102105059220153,
      "learning_rate": 1.5800993619728645e-05,
      "loss": 0.6071,
      "step": 13045
    },
    {
      "epoch": 0.8370750481077613,
      "grad_norm": 1.0918121069567406,
      "learning_rate": 1.574064915656508e-05,
      "loss": 0.6389,
      "step": 13050
    },
    {
      "epoch": 0.8373957665169981,
      "grad_norm": 0.8119509757109902,
      "learning_rate": 1.5680410297450097e-05,
      "loss": 0.6904,
      "step": 13055
    },
    {
      "epoch": 0.8377164849262347,
      "grad_norm": 1.0654010067070523,
      "learning_rate": 1.56202771178824e-05,
      "loss": 0.6806,
      "step": 13060
    },
    {
      "epoch": 0.8380372033354715,
      "grad_norm": 1.003140917229182,
      "learning_rate": 1.5560249693228167e-05,
      "loss": 0.7506,
      "step": 13065
    },
    {
      "epoch": 0.8383579217447081,
      "grad_norm": 0.8104009198927022,
      "learning_rate": 1.5500328098721017e-05,
      "loss": 0.6771,
      "step": 13070
    },
    {
      "epoch": 0.8386786401539449,
      "grad_norm": 0.6505916854083006,
      "learning_rate": 1.5440512409462027e-05,
      "loss": 0.4606,
      "step": 13075
    },
    {
      "epoch": 0.8389993585631815,
      "grad_norm": 0.8172274238106711,
      "learning_rate": 1.5380802700419437e-05,
      "loss": 0.6273,
      "step": 13080
    },
    {
      "epoch": 0.8393200769724182,
      "grad_norm": 0.8412486560565198,
      "learning_rate": 1.5321199046428748e-05,
      "loss": 0.6232,
      "step": 13085
    },
    {
      "epoch": 0.8396407953816549,
      "grad_norm": 1.2677355193498017,
      "learning_rate": 1.526170152219246e-05,
      "loss": 0.6965,
      "step": 13090
    },
    {
      "epoch": 0.8399615137908916,
      "grad_norm": 1.1729148810404941,
      "learning_rate": 1.520231020228008e-05,
      "loss": 0.6742,
      "step": 13095
    },
    {
      "epoch": 0.8402822322001283,
      "grad_norm": 0.9492910072998716,
      "learning_rate": 1.51430251611281e-05,
      "loss": 0.6427,
      "step": 13100
    },
    {
      "epoch": 0.840602950609365,
      "grad_norm": 0.9485664054113067,
      "learning_rate": 1.508384647303962e-05,
      "loss": 0.7599,
      "step": 13105
    },
    {
      "epoch": 0.8409236690186017,
      "grad_norm": 0.7710450909617227,
      "learning_rate": 1.5024774212184644e-05,
      "loss": 0.7211,
      "step": 13110
    },
    {
      "epoch": 0.8412443874278384,
      "grad_norm": 1.4732302257890362,
      "learning_rate": 1.496580845259965e-05,
      "loss": 0.5757,
      "step": 13115
    },
    {
      "epoch": 0.841565105837075,
      "grad_norm": 0.815748738677427,
      "learning_rate": 1.4906949268187731e-05,
      "loss": 0.7202,
      "step": 13120
    },
    {
      "epoch": 0.8418858242463118,
      "grad_norm": 0.7569265134733956,
      "learning_rate": 1.4848196732718333e-05,
      "loss": 0.5067,
      "step": 13125
    },
    {
      "epoch": 0.8422065426555484,
      "grad_norm": 0.7019350874014,
      "learning_rate": 1.4789550919827255e-05,
      "loss": 0.6555,
      "step": 13130
    },
    {
      "epoch": 0.8425272610647851,
      "grad_norm": 1.070502908495116,
      "learning_rate": 1.4731011903016589e-05,
      "loss": 0.5612,
      "step": 13135
    },
    {
      "epoch": 0.8428479794740218,
      "grad_norm": 0.8746378057344433,
      "learning_rate": 1.4672579755654492e-05,
      "loss": 0.6644,
      "step": 13140
    },
    {
      "epoch": 0.8431686978832585,
      "grad_norm": 0.9069204901759049,
      "learning_rate": 1.4614254550975282e-05,
      "loss": 0.6041,
      "step": 13145
    },
    {
      "epoch": 0.8434894162924952,
      "grad_norm": 0.6286045045253976,
      "learning_rate": 1.455603636207915e-05,
      "loss": 0.573,
      "step": 13150
    },
    {
      "epoch": 0.8438101347017318,
      "grad_norm": 0.8046184953958996,
      "learning_rate": 1.4497925261932188e-05,
      "loss": 0.7031,
      "step": 13155
    },
    {
      "epoch": 0.8441308531109686,
      "grad_norm": 0.9289022471342262,
      "learning_rate": 1.4439921323366323e-05,
      "loss": 0.6532,
      "step": 13160
    },
    {
      "epoch": 0.8444515715202052,
      "grad_norm": 0.8155940315800527,
      "learning_rate": 1.4382024619079105e-05,
      "loss": 0.6537,
      "step": 13165
    },
    {
      "epoch": 0.844772289929442,
      "grad_norm": 1.064740365786613,
      "learning_rate": 1.432423522163372e-05,
      "loss": 0.598,
      "step": 13170
    },
    {
      "epoch": 0.8450930083386786,
      "grad_norm": 0.7962110283298796,
      "learning_rate": 1.4266553203458831e-05,
      "loss": 0.7714,
      "step": 13175
    },
    {
      "epoch": 0.8454137267479154,
      "grad_norm": 0.9891349725088471,
      "learning_rate": 1.4208978636848591e-05,
      "loss": 0.666,
      "step": 13180
    },
    {
      "epoch": 0.845734445157152,
      "grad_norm": 0.4894444101288945,
      "learning_rate": 1.4151511593962418e-05,
      "loss": 0.5697,
      "step": 13185
    },
    {
      "epoch": 0.8460551635663887,
      "grad_norm": 0.9733970578229911,
      "learning_rate": 1.4094152146824969e-05,
      "loss": 0.639,
      "step": 13190
    },
    {
      "epoch": 0.8463758819756254,
      "grad_norm": 0.7185939555951706,
      "learning_rate": 1.40369003673261e-05,
      "loss": 0.6608,
      "step": 13195
    },
    {
      "epoch": 0.846696600384862,
      "grad_norm": 0.9770382341303654,
      "learning_rate": 1.3979756327220683e-05,
      "loss": 0.5714,
      "step": 13200
    },
    {
      "epoch": 0.8470173187940988,
      "grad_norm": 0.9521286519290345,
      "learning_rate": 1.3922720098128527e-05,
      "loss": 0.7672,
      "step": 13205
    },
    {
      "epoch": 0.8473380372033354,
      "grad_norm": 1.0026426650083589,
      "learning_rate": 1.3865791751534418e-05,
      "loss": 0.589,
      "step": 13210
    },
    {
      "epoch": 0.8476587556125722,
      "grad_norm": 0.7945648077908503,
      "learning_rate": 1.3808971358787837e-05,
      "loss": 0.5791,
      "step": 13215
    },
    {
      "epoch": 0.8479794740218088,
      "grad_norm": 0.6890626619071494,
      "learning_rate": 1.3752258991103018e-05,
      "loss": 0.7313,
      "step": 13220
    },
    {
      "epoch": 0.8483001924310456,
      "grad_norm": 0.8523591274592248,
      "learning_rate": 1.369565471955878e-05,
      "loss": 0.7,
      "step": 13225
    },
    {
      "epoch": 0.8486209108402822,
      "grad_norm": 0.6661468510777631,
      "learning_rate": 1.3639158615098457e-05,
      "loss": 0.681,
      "step": 13230
    },
    {
      "epoch": 0.8489416292495189,
      "grad_norm": 0.498183926121059,
      "learning_rate": 1.3582770748529839e-05,
      "loss": 0.6238,
      "step": 13235
    },
    {
      "epoch": 0.8492623476587556,
      "grad_norm": 0.9855613055277577,
      "learning_rate": 1.3526491190525025e-05,
      "loss": 0.7218,
      "step": 13240
    },
    {
      "epoch": 0.8495830660679923,
      "grad_norm": 0.42718056670086024,
      "learning_rate": 1.3470320011620418e-05,
      "loss": 0.5768,
      "step": 13245
    },
    {
      "epoch": 0.849903784477229,
      "grad_norm": 1.0362257024186183,
      "learning_rate": 1.3414257282216535e-05,
      "loss": 0.6332,
      "step": 13250
    },
    {
      "epoch": 0.8502245028864657,
      "grad_norm": 0.8990446366365678,
      "learning_rate": 1.3358303072578027e-05,
      "loss": 0.6709,
      "step": 13255
    },
    {
      "epoch": 0.8505452212957024,
      "grad_norm": 0.7211479323078617,
      "learning_rate": 1.3302457452833484e-05,
      "loss": 0.5878,
      "step": 13260
    },
    {
      "epoch": 0.8508659397049391,
      "grad_norm": 1.1924552884788637,
      "learning_rate": 1.3246720492975396e-05,
      "loss": 0.7302,
      "step": 13265
    },
    {
      "epoch": 0.8511866581141757,
      "grad_norm": 0.6589451039855936,
      "learning_rate": 1.3191092262860127e-05,
      "loss": 0.6891,
      "step": 13270
    },
    {
      "epoch": 0.8515073765234125,
      "grad_norm": 0.6379938202383435,
      "learning_rate": 1.3135572832207699e-05,
      "loss": 0.5751,
      "step": 13275
    },
    {
      "epoch": 0.8518280949326491,
      "grad_norm": 0.6107227064835382,
      "learning_rate": 1.3080162270601826e-05,
      "loss": 0.6705,
      "step": 13280
    },
    {
      "epoch": 0.8521488133418859,
      "grad_norm": 0.7796857101023206,
      "learning_rate": 1.3024860647489756e-05,
      "loss": 0.595,
      "step": 13285
    },
    {
      "epoch": 0.8524695317511225,
      "grad_norm": 1.067556813441523,
      "learning_rate": 1.2969668032182147e-05,
      "loss": 0.6906,
      "step": 13290
    },
    {
      "epoch": 0.8527902501603593,
      "grad_norm": 0.7705240841097785,
      "learning_rate": 1.2914584493853144e-05,
      "loss": 0.6176,
      "step": 13295
    },
    {
      "epoch": 0.8531109685695959,
      "grad_norm": 0.9227266859657003,
      "learning_rate": 1.285961010154011e-05,
      "loss": 0.6479,
      "step": 13300
    },
    {
      "epoch": 0.8534316869788325,
      "grad_norm": 1.0189541311376396,
      "learning_rate": 1.2804744924143608e-05,
      "loss": 0.728,
      "step": 13305
    },
    {
      "epoch": 0.8537524053880693,
      "grad_norm": 0.8401997316168908,
      "learning_rate": 1.2749989030427344e-05,
      "loss": 0.7617,
      "step": 13310
    },
    {
      "epoch": 0.8540731237973059,
      "grad_norm": 0.9093797714776795,
      "learning_rate": 1.269534248901807e-05,
      "loss": 0.5851,
      "step": 13315
    },
    {
      "epoch": 0.8543938422065427,
      "grad_norm": 1.1037034088263697,
      "learning_rate": 1.2640805368405462e-05,
      "loss": 0.6118,
      "step": 13320
    },
    {
      "epoch": 0.8547145606157793,
      "grad_norm": 0.7177427685245759,
      "learning_rate": 1.2586377736942034e-05,
      "loss": 0.7042,
      "step": 13325
    },
    {
      "epoch": 0.8550352790250161,
      "grad_norm": 0.9633359403241921,
      "learning_rate": 1.2532059662843144e-05,
      "loss": 0.7182,
      "step": 13330
    },
    {
      "epoch": 0.8553559974342527,
      "grad_norm": 0.8564133887667676,
      "learning_rate": 1.2477851214186754e-05,
      "loss": 0.5807,
      "step": 13335
    },
    {
      "epoch": 0.8556767158434894,
      "grad_norm": 1.0067512789243385,
      "learning_rate": 1.2423752458913518e-05,
      "loss": 0.6689,
      "step": 13340
    },
    {
      "epoch": 0.8559974342527261,
      "grad_norm": 0.6740456644820353,
      "learning_rate": 1.2369763464826533e-05,
      "loss": 0.5505,
      "step": 13345
    },
    {
      "epoch": 0.8563181526619628,
      "grad_norm": 0.7485205146558563,
      "learning_rate": 1.2315884299591362e-05,
      "loss": 0.7485,
      "step": 13350
    },
    {
      "epoch": 0.8566388710711995,
      "grad_norm": 0.9943455478406926,
      "learning_rate": 1.2262115030735944e-05,
      "loss": 0.7464,
      "step": 13355
    },
    {
      "epoch": 0.8569595894804362,
      "grad_norm": 0.7832997459113116,
      "learning_rate": 1.2208455725650436e-05,
      "loss": 0.6956,
      "step": 13360
    },
    {
      "epoch": 0.8572803078896729,
      "grad_norm": 0.9569726126068407,
      "learning_rate": 1.2154906451587189e-05,
      "loss": 0.7132,
      "step": 13365
    },
    {
      "epoch": 0.8576010262989096,
      "grad_norm": 0.7447552856015294,
      "learning_rate": 1.2101467275660661e-05,
      "loss": 0.4959,
      "step": 13370
    },
    {
      "epoch": 0.8579217447081462,
      "grad_norm": 0.8033856598382162,
      "learning_rate": 1.2048138264847297e-05,
      "loss": 0.8208,
      "step": 13375
    },
    {
      "epoch": 0.858242463117383,
      "grad_norm": 1.2548309542667209,
      "learning_rate": 1.1994919485985522e-05,
      "loss": 0.5933,
      "step": 13380
    },
    {
      "epoch": 0.8585631815266196,
      "grad_norm": 0.8849084463562876,
      "learning_rate": 1.1941811005775538e-05,
      "loss": 0.7345,
      "step": 13385
    },
    {
      "epoch": 0.8588838999358563,
      "grad_norm": 1.0662756941569218,
      "learning_rate": 1.1888812890779377e-05,
      "loss": 0.672,
      "step": 13390
    },
    {
      "epoch": 0.859204618345093,
      "grad_norm": 1.4484403343446357,
      "learning_rate": 1.1835925207420694e-05,
      "loss": 0.606,
      "step": 13395
    },
    {
      "epoch": 0.8595253367543297,
      "grad_norm": 0.8332555994611591,
      "learning_rate": 1.1783148021984725e-05,
      "loss": 0.692,
      "step": 13400
    },
    {
      "epoch": 0.8598460551635664,
      "grad_norm": 0.7857634142558743,
      "learning_rate": 1.1730481400618299e-05,
      "loss": 0.8791,
      "step": 13405
    },
    {
      "epoch": 0.8601667735728031,
      "grad_norm": 1.0726454797623632,
      "learning_rate": 1.167792540932957e-05,
      "loss": 0.6978,
      "step": 13410
    },
    {
      "epoch": 0.8604874919820398,
      "grad_norm": 0.662627507867472,
      "learning_rate": 1.162548011398814e-05,
      "loss": 0.6655,
      "step": 13415
    },
    {
      "epoch": 0.8608082103912764,
      "grad_norm": 0.7427411411925819,
      "learning_rate": 1.1573145580324785e-05,
      "loss": 0.7019,
      "step": 13420
    },
    {
      "epoch": 0.8611289288005132,
      "grad_norm": 0.8465518983483786,
      "learning_rate": 1.1520921873931489e-05,
      "loss": 0.7452,
      "step": 13425
    },
    {
      "epoch": 0.8614496472097498,
      "grad_norm": 0.5455286801662246,
      "learning_rate": 1.1468809060261399e-05,
      "loss": 0.652,
      "step": 13430
    },
    {
      "epoch": 0.8617703656189866,
      "grad_norm": 0.8972113556345591,
      "learning_rate": 1.1416807204628533e-05,
      "loss": 0.5988,
      "step": 13435
    },
    {
      "epoch": 0.8620910840282232,
      "grad_norm": 0.6854697322056322,
      "learning_rate": 1.1364916372208e-05,
      "loss": 0.696,
      "step": 13440
    },
    {
      "epoch": 0.86241180243746,
      "grad_norm": 0.678150343614853,
      "learning_rate": 1.1313136628035647e-05,
      "loss": 0.5252,
      "step": 13445
    },
    {
      "epoch": 0.8627325208466966,
      "grad_norm": 0.6285060401132421,
      "learning_rate": 1.1261468037008172e-05,
      "loss": 0.4725,
      "step": 13450
    },
    {
      "epoch": 0.8630532392559332,
      "grad_norm": 0.6510845504498061,
      "learning_rate": 1.1209910663882916e-05,
      "loss": 0.5565,
      "step": 13455
    },
    {
      "epoch": 0.86337395766517,
      "grad_norm": 1.2698183413935256,
      "learning_rate": 1.1158464573277816e-05,
      "loss": 0.7544,
      "step": 13460
    },
    {
      "epoch": 0.8636946760744066,
      "grad_norm": 1.048484623181104,
      "learning_rate": 1.1107129829671393e-05,
      "loss": 0.6762,
      "step": 13465
    },
    {
      "epoch": 0.8640153944836434,
      "grad_norm": 0.8197138470113798,
      "learning_rate": 1.1055906497402534e-05,
      "loss": 0.7671,
      "step": 13470
    },
    {
      "epoch": 0.86433611289288,
      "grad_norm": 0.8060735013585868,
      "learning_rate": 1.1004794640670602e-05,
      "loss": 0.7412,
      "step": 13475
    },
    {
      "epoch": 0.8646568313021168,
      "grad_norm": 0.5202202198681646,
      "learning_rate": 1.0953794323535138e-05,
      "loss": 0.617,
      "step": 13480
    },
    {
      "epoch": 0.8649775497113534,
      "grad_norm": 0.9060221838859691,
      "learning_rate": 1.0902905609915925e-05,
      "loss": 0.6724,
      "step": 13485
    },
    {
      "epoch": 0.8652982681205901,
      "grad_norm": 0.9948896143875089,
      "learning_rate": 1.0852128563592911e-05,
      "loss": 0.6916,
      "step": 13490
    },
    {
      "epoch": 0.8656189865298268,
      "grad_norm": 0.6185205159442889,
      "learning_rate": 1.0801463248206012e-05,
      "loss": 0.6155,
      "step": 13495
    },
    {
      "epoch": 0.8659397049390635,
      "grad_norm": 0.8621415617622489,
      "learning_rate": 1.0750909727255231e-05,
      "loss": 0.5641,
      "step": 13500
    },
    {
      "epoch": 0.8662604233483002,
      "grad_norm": 1.0099987644568347,
      "learning_rate": 1.0700468064100278e-05,
      "loss": 0.5874,
      "step": 13505
    },
    {
      "epoch": 0.8665811417575369,
      "grad_norm": 0.7860625683994522,
      "learning_rate": 1.0650138321960834e-05,
      "loss": 0.6447,
      "step": 13510
    },
    {
      "epoch": 0.8669018601667736,
      "grad_norm": 1.0075130412273372,
      "learning_rate": 1.0599920563916233e-05,
      "loss": 0.6428,
      "step": 13515
    },
    {
      "epoch": 0.8672225785760103,
      "grad_norm": 0.8433746537048423,
      "learning_rate": 1.0549814852905427e-05,
      "loss": 0.6156,
      "step": 13520
    },
    {
      "epoch": 0.8675432969852469,
      "grad_norm": 0.6911458595910109,
      "learning_rate": 1.0499821251727038e-05,
      "loss": 0.7697,
      "step": 13525
    },
    {
      "epoch": 0.8678640153944837,
      "grad_norm": 0.7261479775249019,
      "learning_rate": 1.044993982303909e-05,
      "loss": 0.7353,
      "step": 13530
    },
    {
      "epoch": 0.8681847338037203,
      "grad_norm": 0.5256687873474478,
      "learning_rate": 1.040017062935902e-05,
      "loss": 0.5737,
      "step": 13535
    },
    {
      "epoch": 0.868505452212957,
      "grad_norm": 1.0493206252194889,
      "learning_rate": 1.035051373306366e-05,
      "loss": 0.6215,
      "step": 13540
    },
    {
      "epoch": 0.8688261706221937,
      "grad_norm": 0.665208544741004,
      "learning_rate": 1.0300969196389033e-05,
      "loss": 0.6073,
      "step": 13545
    },
    {
      "epoch": 0.8691468890314304,
      "grad_norm": 0.6978534685649864,
      "learning_rate": 1.0251537081430406e-05,
      "loss": 0.5837,
      "step": 13550
    },
    {
      "epoch": 0.8694676074406671,
      "grad_norm": 0.7579892159049441,
      "learning_rate": 1.0202217450142082e-05,
      "loss": 0.5604,
      "step": 13555
    },
    {
      "epoch": 0.8697883258499037,
      "grad_norm": 0.6514517952782195,
      "learning_rate": 1.015301036433739e-05,
      "loss": 0.6971,
      "step": 13560
    },
    {
      "epoch": 0.8701090442591405,
      "grad_norm": 0.4398371785948417,
      "learning_rate": 1.0103915885688686e-05,
      "loss": 0.5459,
      "step": 13565
    },
    {
      "epoch": 0.8704297626683771,
      "grad_norm": 0.6924160948174624,
      "learning_rate": 1.0054934075727062e-05,
      "loss": 0.5386,
      "step": 13570
    },
    {
      "epoch": 0.8707504810776139,
      "grad_norm": 0.9269090072648052,
      "learning_rate": 1.0006064995842513e-05,
      "loss": 0.7547,
      "step": 13575
    },
    {
      "epoch": 0.8710711994868505,
      "grad_norm": 0.788185049843599,
      "learning_rate": 9.957308707283675e-06,
      "loss": 0.6128,
      "step": 13580
    },
    {
      "epoch": 0.8713919178960873,
      "grad_norm": 0.7308595928706564,
      "learning_rate": 9.90866527115788e-06,
      "loss": 0.6036,
      "step": 13585
    },
    {
      "epoch": 0.8717126363053239,
      "grad_norm": 0.7092354057653707,
      "learning_rate": 9.860134748430972e-06,
      "loss": 0.7038,
      "step": 13590
    },
    {
      "epoch": 0.8720333547145607,
      "grad_norm": 1.0470346737728682,
      "learning_rate": 9.811717199927273e-06,
      "loss": 0.73,
      "step": 13595
    },
    {
      "epoch": 0.8723540731237973,
      "grad_norm": 1.2863495939351028,
      "learning_rate": 9.763412686329575e-06,
      "loss": 0.7084,
      "step": 13600
    },
    {
      "epoch": 0.872674791533034,
      "grad_norm": 0.768903275631644,
      "learning_rate": 9.71522126817892e-06,
      "loss": 0.7444,
      "step": 13605
    },
    {
      "epoch": 0.8729955099422707,
      "grad_norm": 0.8561382895899066,
      "learning_rate": 9.667143005874679e-06,
      "loss": 0.6743,
      "step": 13610
    },
    {
      "epoch": 0.8733162283515074,
      "grad_norm": 0.6255033102428371,
      "learning_rate": 9.619177959674353e-06,
      "loss": 0.6357,
      "step": 13615
    },
    {
      "epoch": 0.8736369467607441,
      "grad_norm": 0.8367879131361138,
      "learning_rate": 9.57132618969354e-06,
      "loss": 0.7229,
      "step": 13620
    },
    {
      "epoch": 0.8739576651699807,
      "grad_norm": 0.6292130728042913,
      "learning_rate": 9.523587755905938e-06,
      "loss": 0.6561,
      "step": 13625
    },
    {
      "epoch": 0.8742783835792175,
      "grad_norm": 0.9860204738083063,
      "learning_rate": 9.475962718143106e-06,
      "loss": 0.6323,
      "step": 13630
    },
    {
      "epoch": 0.8745991019884541,
      "grad_norm": 0.841887275726057,
      "learning_rate": 9.428451136094541e-06,
      "loss": 0.6762,
      "step": 13635
    },
    {
      "epoch": 0.8749198203976908,
      "grad_norm": 0.7100122528682058,
      "learning_rate": 9.381053069307499e-06,
      "loss": 0.5494,
      "step": 13640
    },
    {
      "epoch": 0.8752405388069275,
      "grad_norm": 1.1202599763010757,
      "learning_rate": 9.33376857718703e-06,
      "loss": 0.5936,
      "step": 13645
    },
    {
      "epoch": 0.8755612572161642,
      "grad_norm": 1.0773135254923245,
      "learning_rate": 9.286597718995783e-06,
      "loss": 0.5523,
      "step": 13650
    },
    {
      "epoch": 0.8758819756254009,
      "grad_norm": 0.7262011668633317,
      "learning_rate": 9.239540553853987e-06,
      "loss": 0.7559,
      "step": 13655
    },
    {
      "epoch": 0.8762026940346376,
      "grad_norm": 1.1845562776611291,
      "learning_rate": 9.192597140739445e-06,
      "loss": 0.6214,
      "step": 13660
    },
    {
      "epoch": 0.8765234124438743,
      "grad_norm": 1.0832215867500623,
      "learning_rate": 9.145767538487282e-06,
      "loss": 0.6363,
      "step": 13665
    },
    {
      "epoch": 0.876844130853111,
      "grad_norm": 0.8384508766840872,
      "learning_rate": 9.099051805790081e-06,
      "loss": 0.7162,
      "step": 13670
    },
    {
      "epoch": 0.8771648492623476,
      "grad_norm": 0.7886740113805487,
      "learning_rate": 9.052450001197666e-06,
      "loss": 0.5292,
      "step": 13675
    },
    {
      "epoch": 0.8774855676715844,
      "grad_norm": 0.724073412445175,
      "learning_rate": 9.005962183117055e-06,
      "loss": 0.7159,
      "step": 13680
    },
    {
      "epoch": 0.877806286080821,
      "grad_norm": 0.5059344342927663,
      "learning_rate": 8.959588409812458e-06,
      "loss": 0.6316,
      "step": 13685
    },
    {
      "epoch": 0.8781270044900578,
      "grad_norm": 1.2097294273874917,
      "learning_rate": 8.913328739405092e-06,
      "loss": 0.7006,
      "step": 13690
    },
    {
      "epoch": 0.8784477228992944,
      "grad_norm": 0.615032496760421,
      "learning_rate": 8.867183229873211e-06,
      "loss": 0.738,
      "step": 13695
    },
    {
      "epoch": 0.8787684413085312,
      "grad_norm": 0.6135358966273193,
      "learning_rate": 8.821151939051953e-06,
      "loss": 0.6287,
      "step": 13700
    },
    {
      "epoch": 0.8790891597177678,
      "grad_norm": 0.8910522096004475,
      "learning_rate": 8.775234924633301e-06,
      "loss": 0.7301,
      "step": 13705
    },
    {
      "epoch": 0.8794098781270044,
      "grad_norm": 0.7815093977889225,
      "learning_rate": 8.72943224416609e-06,
      "loss": 0.6499,
      "step": 13710
    },
    {
      "epoch": 0.8797305965362412,
      "grad_norm": 0.5607257491266542,
      "learning_rate": 8.683743955055746e-06,
      "loss": 0.6083,
      "step": 13715
    },
    {
      "epoch": 0.8800513149454778,
      "grad_norm": 0.884214002379739,
      "learning_rate": 8.638170114564414e-06,
      "loss": 0.611,
      "step": 13720
    },
    {
      "epoch": 0.8803720333547146,
      "grad_norm": 0.7528314170250561,
      "learning_rate": 8.592710779810765e-06,
      "loss": 0.6921,
      "step": 13725
    },
    {
      "epoch": 0.8806927517639512,
      "grad_norm": 0.9161588988308113,
      "learning_rate": 8.547366007769919e-06,
      "loss": 0.652,
      "step": 13730
    },
    {
      "epoch": 0.881013470173188,
      "grad_norm": 1.4044011636843894,
      "learning_rate": 8.502135855273497e-06,
      "loss": 0.6532,
      "step": 13735
    },
    {
      "epoch": 0.8813341885824246,
      "grad_norm": 0.5311315649019397,
      "learning_rate": 8.457020379009373e-06,
      "loss": 0.5949,
      "step": 13740
    },
    {
      "epoch": 0.8816549069916613,
      "grad_norm": 0.6747473256173435,
      "learning_rate": 8.412019635521784e-06,
      "loss": 0.5982,
      "step": 13745
    },
    {
      "epoch": 0.881975625400898,
      "grad_norm": 0.6539295071967237,
      "learning_rate": 8.367133681211103e-06,
      "loss": 0.4702,
      "step": 13750
    },
    {
      "epoch": 0.8822963438101347,
      "grad_norm": 0.403206890252452,
      "learning_rate": 8.322362572333841e-06,
      "loss": 0.5464,
      "step": 13755
    },
    {
      "epoch": 0.8826170622193714,
      "grad_norm": 0.7780767642995721,
      "learning_rate": 8.277706365002625e-06,
      "loss": 0.6976,
      "step": 13760
    },
    {
      "epoch": 0.8829377806286081,
      "grad_norm": 0.6272304201483566,
      "learning_rate": 8.233165115186003e-06,
      "loss": 0.6613,
      "step": 13765
    },
    {
      "epoch": 0.8832584990378448,
      "grad_norm": 0.8343537172020628,
      "learning_rate": 8.188738878708502e-06,
      "loss": 0.7469,
      "step": 13770
    },
    {
      "epoch": 0.8835792174470815,
      "grad_norm": 0.9345794017556924,
      "learning_rate": 8.144427711250447e-06,
      "loss": 0.7586,
      "step": 13775
    },
    {
      "epoch": 0.8838999358563181,
      "grad_norm": 1.162828611729811,
      "learning_rate": 8.100231668348002e-06,
      "loss": 0.5382,
      "step": 13780
    },
    {
      "epoch": 0.8842206542655549,
      "grad_norm": 1.1205395105885234,
      "learning_rate": 8.056150805392993e-06,
      "loss": 0.6138,
      "step": 13785
    },
    {
      "epoch": 0.8845413726747915,
      "grad_norm": 0.5630057786543724,
      "learning_rate": 8.012185177632914e-06,
      "loss": 0.4977,
      "step": 13790
    },
    {
      "epoch": 0.8848620910840282,
      "grad_norm": 0.8477848139037634,
      "learning_rate": 7.968334840170843e-06,
      "loss": 0.7394,
      "step": 13795
    },
    {
      "epoch": 0.8851828094932649,
      "grad_norm": 0.7207695540829029,
      "learning_rate": 7.92459984796532e-06,
      "loss": 0.7108,
      "step": 13800
    },
    {
      "epoch": 0.8855035279025016,
      "grad_norm": 0.9355747131091594,
      "learning_rate": 7.880980255830372e-06,
      "loss": 0.6971,
      "step": 13805
    },
    {
      "epoch": 0.8858242463117383,
      "grad_norm": 0.8391670611046308,
      "learning_rate": 7.83747611843536e-06,
      "loss": 0.6618,
      "step": 13810
    },
    {
      "epoch": 0.886144964720975,
      "grad_norm": 0.6940296556964382,
      "learning_rate": 7.794087490304935e-06,
      "loss": 0.7303,
      "step": 13815
    },
    {
      "epoch": 0.8864656831302117,
      "grad_norm": 0.5463085826484815,
      "learning_rate": 7.75081442581902e-06,
      "loss": 0.7128,
      "step": 13820
    },
    {
      "epoch": 0.8867864015394483,
      "grad_norm": 0.9553016730601827,
      "learning_rate": 7.707656979212653e-06,
      "loss": 0.5325,
      "step": 13825
    },
    {
      "epoch": 0.8871071199486851,
      "grad_norm": 0.9151217967040441,
      "learning_rate": 7.66461520457602e-06,
      "loss": 0.7276,
      "step": 13830
    },
    {
      "epoch": 0.8874278383579217,
      "grad_norm": 0.7869236135130984,
      "learning_rate": 7.6216891558542395e-06,
      "loss": 0.744,
      "step": 13835
    },
    {
      "epoch": 0.8877485567671585,
      "grad_norm": 0.748585172606016,
      "learning_rate": 7.578878886847507e-06,
      "loss": 0.5891,
      "step": 13840
    },
    {
      "epoch": 0.8880692751763951,
      "grad_norm": 0.7205402378107477,
      "learning_rate": 7.536184451210815e-06,
      "loss": 0.6715,
      "step": 13845
    },
    {
      "epoch": 0.8883899935856319,
      "grad_norm": 0.6198613140638497,
      "learning_rate": 7.493605902454004e-06,
      "loss": 0.7581,
      "step": 13850
    },
    {
      "epoch": 0.8887107119948685,
      "grad_norm": 0.84149727085621,
      "learning_rate": 7.451143293941709e-06,
      "loss": 0.746,
      "step": 13855
    },
    {
      "epoch": 0.8890314304041051,
      "grad_norm": 0.8368846152026573,
      "learning_rate": 7.408796678893226e-06,
      "loss": 0.6687,
      "step": 13860
    },
    {
      "epoch": 0.8893521488133419,
      "grad_norm": 0.7827898269521945,
      "learning_rate": 7.366566110382445e-06,
      "loss": 0.5832,
      "step": 13865
    },
    {
      "epoch": 0.8896728672225785,
      "grad_norm": 0.7300699318830831,
      "learning_rate": 7.324451641337882e-06,
      "loss": 0.6294,
      "step": 13870
    },
    {
      "epoch": 0.8899935856318153,
      "grad_norm": 0.9238241719407477,
      "learning_rate": 7.28245332454246e-06,
      "loss": 0.7083,
      "step": 13875
    },
    {
      "epoch": 0.8903143040410519,
      "grad_norm": 0.7709293686153301,
      "learning_rate": 7.240571212633618e-06,
      "loss": 0.5686,
      "step": 13880
    },
    {
      "epoch": 0.8906350224502887,
      "grad_norm": 1.1869349060713659,
      "learning_rate": 7.198805358103067e-06,
      "loss": 0.728,
      "step": 13885
    },
    {
      "epoch": 0.8909557408595253,
      "grad_norm": 1.0851258551108929,
      "learning_rate": 7.157155813296834e-06,
      "loss": 0.7379,
      "step": 13890
    },
    {
      "epoch": 0.891276459268762,
      "grad_norm": 0.7394708926504447,
      "learning_rate": 7.115622630415253e-06,
      "loss": 0.7321,
      "step": 13895
    },
    {
      "epoch": 0.8915971776779987,
      "grad_norm": 0.816039779235774,
      "learning_rate": 7.0742058615126726e-06,
      "loss": 0.601,
      "step": 13900
    },
    {
      "epoch": 0.8919178960872354,
      "grad_norm": 0.41244323070119415,
      "learning_rate": 7.03290555849766e-06,
      "loss": 0.5809,
      "step": 13905
    },
    {
      "epoch": 0.8922386144964721,
      "grad_norm": 0.8918418533925353,
      "learning_rate": 6.991721773132742e-06,
      "loss": 0.7142,
      "step": 13910
    },
    {
      "epoch": 0.8925593329057088,
      "grad_norm": 0.8732825568065812,
      "learning_rate": 6.950654557034475e-06,
      "loss": 0.6635,
      "step": 13915
    },
    {
      "epoch": 0.8928800513149455,
      "grad_norm": 0.7358325355065991,
      "learning_rate": 6.909703961673253e-06,
      "loss": 0.6412,
      "step": 13920
    },
    {
      "epoch": 0.8932007697241822,
      "grad_norm": 0.7111667197818642,
      "learning_rate": 6.868870038373332e-06,
      "loss": 0.6767,
      "step": 13925
    },
    {
      "epoch": 0.8935214881334188,
      "grad_norm": 1.0721200112803682,
      "learning_rate": 6.828152838312773e-06,
      "loss": 0.5066,
      "step": 13930
    },
    {
      "epoch": 0.8938422065426556,
      "grad_norm": 0.8310238983860934,
      "learning_rate": 6.787552412523279e-06,
      "loss": 0.6764,
      "step": 13935
    },
    {
      "epoch": 0.8941629249518922,
      "grad_norm": 0.6872676077028719,
      "learning_rate": 6.747068811890256e-06,
      "loss": 0.6671,
      "step": 13940
    },
    {
      "epoch": 0.894483643361129,
      "grad_norm": 0.9702633803545438,
      "learning_rate": 6.706702087152661e-06,
      "loss": 0.4624,
      "step": 13945
    },
    {
      "epoch": 0.8948043617703656,
      "grad_norm": 1.375052365512822,
      "learning_rate": 6.666452288902958e-06,
      "loss": 0.7522,
      "step": 13950
    },
    {
      "epoch": 0.8951250801796024,
      "grad_norm": 0.908667367564301,
      "learning_rate": 6.626319467587106e-06,
      "loss": 0.6602,
      "step": 13955
    },
    {
      "epoch": 0.895445798588839,
      "grad_norm": 1.0327055092345554,
      "learning_rate": 6.586303673504412e-06,
      "loss": 0.6192,
      "step": 13960
    },
    {
      "epoch": 0.8957665169980756,
      "grad_norm": 0.7913439515419154,
      "learning_rate": 6.5464049568075615e-06,
      "loss": 0.6883,
      "step": 13965
    },
    {
      "epoch": 0.8960872354073124,
      "grad_norm": 0.9249759944838365,
      "learning_rate": 6.506623367502418e-06,
      "loss": 0.7207,
      "step": 13970
    },
    {
      "epoch": 0.896407953816549,
      "grad_norm": 0.6185623923439777,
      "learning_rate": 6.4669589554481325e-06,
      "loss": 0.7935,
      "step": 13975
    },
    {
      "epoch": 0.8967286722257858,
      "grad_norm": 0.9047502038967159,
      "learning_rate": 6.4274117703569615e-06,
      "loss": 0.523,
      "step": 13980
    },
    {
      "epoch": 0.8970493906350224,
      "grad_norm": 0.5862791588591175,
      "learning_rate": 6.387981861794212e-06,
      "loss": 0.5767,
      "step": 13985
    },
    {
      "epoch": 0.8973701090442592,
      "grad_norm": 0.9286416832372187,
      "learning_rate": 6.348669279178277e-06,
      "loss": 0.5952,
      "step": 13990
    },
    {
      "epoch": 0.8976908274534958,
      "grad_norm": 0.9632286005822661,
      "learning_rate": 6.309474071780408e-06,
      "loss": 0.7512,
      "step": 13995
    },
    {
      "epoch": 0.8980115458627326,
      "grad_norm": 0.6713818773459586,
      "learning_rate": 6.2703962887248444e-06,
      "loss": 0.8033,
      "step": 14000
    },
    {
      "epoch": 0.8983322642719692,
      "grad_norm": 0.43098921146350616,
      "learning_rate": 6.2314359789885756e-06,
      "loss": 0.5506,
      "step": 14005
    },
    {
      "epoch": 0.8986529826812059,
      "grad_norm": 1.029998963102262,
      "learning_rate": 6.192593191401396e-06,
      "loss": 0.6528,
      "step": 14010
    },
    {
      "epoch": 0.8989737010904426,
      "grad_norm": 0.9428065435910548,
      "learning_rate": 6.153867974645833e-06,
      "loss": 0.6822,
      "step": 14015
    },
    {
      "epoch": 0.8992944194996793,
      "grad_norm": 0.6275896637114994,
      "learning_rate": 6.115260377257004e-06,
      "loss": 0.556,
      "step": 14020
    },
    {
      "epoch": 0.899615137908916,
      "grad_norm": 0.6094084941175278,
      "learning_rate": 6.076770447622615e-06,
      "loss": 0.5094,
      "step": 14025
    },
    {
      "epoch": 0.8999358563181526,
      "grad_norm": 0.7526848860794296,
      "learning_rate": 6.038398233982989e-06,
      "loss": 0.678,
      "step": 14030
    },
    {
      "epoch": 0.9002565747273894,
      "grad_norm": 0.5704944797751071,
      "learning_rate": 6.000143784430756e-06,
      "loss": 0.6822,
      "step": 14035
    },
    {
      "epoch": 0.900577293136626,
      "grad_norm": 0.7525424440388754,
      "learning_rate": 5.962007146911109e-06,
      "loss": 0.7008,
      "step": 14040
    },
    {
      "epoch": 0.9008980115458627,
      "grad_norm": 0.961888964093016,
      "learning_rate": 5.923988369221456e-06,
      "loss": 0.6805,
      "step": 14045
    },
    {
      "epoch": 0.9012187299550994,
      "grad_norm": 0.8861288123930613,
      "learning_rate": 5.886087499011594e-06,
      "loss": 0.758,
      "step": 14050
    },
    {
      "epoch": 0.9015394483643361,
      "grad_norm": 0.8032927310909407,
      "learning_rate": 5.8483045837834705e-06,
      "loss": 0.6607,
      "step": 14055
    },
    {
      "epoch": 0.9018601667735728,
      "grad_norm": 0.8087075039644414,
      "learning_rate": 5.810639670891216e-06,
      "loss": 0.7027,
      "step": 14060
    },
    {
      "epoch": 0.9021808851828095,
      "grad_norm": 0.8539578913251452,
      "learning_rate": 5.773092807541092e-06,
      "loss": 0.5801,
      "step": 14065
    },
    {
      "epoch": 0.9025016035920462,
      "grad_norm": 0.7756452243315396,
      "learning_rate": 5.735664040791367e-06,
      "loss": 0.7103,
      "step": 14070
    },
    {
      "epoch": 0.9028223220012829,
      "grad_norm": 1.222999060061691,
      "learning_rate": 5.698353417552327e-06,
      "loss": 0.6017,
      "step": 14075
    },
    {
      "epoch": 0.9031430404105195,
      "grad_norm": 0.7983696291416744,
      "learning_rate": 5.661160984586178e-06,
      "loss": 0.6049,
      "step": 14080
    },
    {
      "epoch": 0.9034637588197563,
      "grad_norm": 0.6490907871037943,
      "learning_rate": 5.624086788506977e-06,
      "loss": 0.5526,
      "step": 14085
    },
    {
      "epoch": 0.9037844772289929,
      "grad_norm": 0.8508537947980717,
      "learning_rate": 5.587130875780633e-06,
      "loss": 0.7109,
      "step": 14090
    },
    {
      "epoch": 0.9041051956382297,
      "grad_norm": 1.2192033565455072,
      "learning_rate": 5.550293292724762e-06,
      "loss": 0.7051,
      "step": 14095
    },
    {
      "epoch": 0.9044259140474663,
      "grad_norm": 0.6587693265105345,
      "learning_rate": 5.51357408550871e-06,
      "loss": 0.6174,
      "step": 14100
    },
    {
      "epoch": 0.9047466324567031,
      "grad_norm": 0.49748231304384327,
      "learning_rate": 5.47697330015341e-06,
      "loss": 0.5172,
      "step": 14105
    },
    {
      "epoch": 0.9050673508659397,
      "grad_norm": 0.7884068967557873,
      "learning_rate": 5.440490982531465e-06,
      "loss": 0.6816,
      "step": 14110
    },
    {
      "epoch": 0.9053880692751763,
      "grad_norm": 1.1069363022735697,
      "learning_rate": 5.404127178366902e-06,
      "loss": 0.6431,
      "step": 14115
    },
    {
      "epoch": 0.9057087876844131,
      "grad_norm": 0.6826869882965035,
      "learning_rate": 5.367881933235275e-06,
      "loss": 0.5101,
      "step": 14120
    },
    {
      "epoch": 0.9060295060936497,
      "grad_norm": 0.7273219199634979,
      "learning_rate": 5.331755292563523e-06,
      "loss": 0.637,
      "step": 14125
    },
    {
      "epoch": 0.9063502245028865,
      "grad_norm": 0.8472794210673035,
      "learning_rate": 5.295747301629917e-06,
      "loss": 0.6022,
      "step": 14130
    },
    {
      "epoch": 0.9066709429121231,
      "grad_norm": 0.8444956562341863,
      "learning_rate": 5.259858005564089e-06,
      "loss": 0.5334,
      "step": 14135
    },
    {
      "epoch": 0.9069916613213599,
      "grad_norm": 0.6732860335353007,
      "learning_rate": 5.224087449346826e-06,
      "loss": 0.7202,
      "step": 14140
    },
    {
      "epoch": 0.9073123797305965,
      "grad_norm": 0.9666322387828169,
      "learning_rate": 5.188435677810133e-06,
      "loss": 0.7559,
      "step": 14145
    },
    {
      "epoch": 0.9076330981398332,
      "grad_norm": 0.9869737805273263,
      "learning_rate": 5.152902735637166e-06,
      "loss": 0.678,
      "step": 14150
    },
    {
      "epoch": 0.9079538165490699,
      "grad_norm": 0.5732371579819191,
      "learning_rate": 5.1174886673620805e-06,
      "loss": 0.5993,
      "step": 14155
    },
    {
      "epoch": 0.9082745349583066,
      "grad_norm": 0.8654988560178682,
      "learning_rate": 5.082193517370127e-06,
      "loss": 0.6813,
      "step": 14160
    },
    {
      "epoch": 0.9085952533675433,
      "grad_norm": 1.0265921567687237,
      "learning_rate": 5.047017329897463e-06,
      "loss": 0.6737,
      "step": 14165
    },
    {
      "epoch": 0.90891597177678,
      "grad_norm": 0.8248152748170539,
      "learning_rate": 5.011960149031137e-06,
      "loss": 0.5857,
      "step": 14170
    },
    {
      "epoch": 0.9092366901860167,
      "grad_norm": 0.7956729093404309,
      "learning_rate": 4.977022018709088e-06,
      "loss": 0.6643,
      "step": 14175
    },
    {
      "epoch": 0.9095574085952534,
      "grad_norm": 1.7578923486790687,
      "learning_rate": 4.94220298271999e-06,
      "loss": 0.7325,
      "step": 14180
    },
    {
      "epoch": 0.9098781270044901,
      "grad_norm": 0.8908535862934428,
      "learning_rate": 4.907503084703335e-06,
      "loss": 0.7003,
      "step": 14185
    },
    {
      "epoch": 0.9101988454137268,
      "grad_norm": 0.5989152273082363,
      "learning_rate": 4.872922368149213e-06,
      "loss": 0.6494,
      "step": 14190
    },
    {
      "epoch": 0.9105195638229634,
      "grad_norm": 1.1947032610011639,
      "learning_rate": 4.838460876398365e-06,
      "loss": 0.712,
      "step": 14195
    },
    {
      "epoch": 0.9108402822322001,
      "grad_norm": 0.8008113658697428,
      "learning_rate": 4.804118652642164e-06,
      "loss": 0.6607,
      "step": 14200
    },
    {
      "epoch": 0.9111610006414368,
      "grad_norm": 0.9092451384048743,
      "learning_rate": 4.769895739922403e-06,
      "loss": 0.532,
      "step": 14205
    },
    {
      "epoch": 0.9114817190506735,
      "grad_norm": 0.9642837868126427,
      "learning_rate": 4.7357921811314374e-06,
      "loss": 0.5875,
      "step": 14210
    },
    {
      "epoch": 0.9118024374599102,
      "grad_norm": 1.2120942953279068,
      "learning_rate": 4.701808019011966e-06,
      "loss": 0.644,
      "step": 14215
    },
    {
      "epoch": 0.9121231558691469,
      "grad_norm": 0.7731779356318255,
      "learning_rate": 4.66794329615704e-06,
      "loss": 0.7528,
      "step": 14220
    },
    {
      "epoch": 0.9124438742783836,
      "grad_norm": 0.8452499221199778,
      "learning_rate": 4.634198055010097e-06,
      "loss": 0.7321,
      "step": 14225
    },
    {
      "epoch": 0.9127645926876202,
      "grad_norm": 0.7660682093886364,
      "learning_rate": 4.600572337864739e-06,
      "loss": 0.58,
      "step": 14230
    },
    {
      "epoch": 0.913085311096857,
      "grad_norm": 0.919577008788518,
      "learning_rate": 4.567066186864799e-06,
      "loss": 0.5792,
      "step": 14235
    },
    {
      "epoch": 0.9134060295060936,
      "grad_norm": 0.7240560589023852,
      "learning_rate": 4.53367964400423e-06,
      "loss": 0.6382,
      "step": 14240
    },
    {
      "epoch": 0.9137267479153304,
      "grad_norm": 0.9404018211860803,
      "learning_rate": 4.500412751127148e-06,
      "loss": 0.6983,
      "step": 14245
    },
    {
      "epoch": 0.914047466324567,
      "grad_norm": 0.9226737613175637,
      "learning_rate": 4.467265549927646e-06,
      "loss": 0.7371,
      "step": 14250
    },
    {
      "epoch": 0.9143681847338038,
      "grad_norm": 0.8674349211052579,
      "learning_rate": 4.434238081949793e-06,
      "loss": 0.715,
      "step": 14255
    },
    {
      "epoch": 0.9146889031430404,
      "grad_norm": 1.0086095744064745,
      "learning_rate": 4.401330388587655e-06,
      "loss": 0.6359,
      "step": 14260
    },
    {
      "epoch": 0.915009621552277,
      "grad_norm": 0.7399699212191572,
      "learning_rate": 4.368542511085127e-06,
      "loss": 0.6856,
      "step": 14265
    },
    {
      "epoch": 0.9153303399615138,
      "grad_norm": 0.7837381511015072,
      "learning_rate": 4.3358744905359845e-06,
      "loss": 0.5355,
      "step": 14270
    },
    {
      "epoch": 0.9156510583707504,
      "grad_norm": 0.7456554819958952,
      "learning_rate": 4.303326367883742e-06,
      "loss": 0.6506,
      "step": 14275
    },
    {
      "epoch": 0.9159717767799872,
      "grad_norm": 0.7504015595604561,
      "learning_rate": 4.2708981839216344e-06,
      "loss": 0.7347,
      "step": 14280
    },
    {
      "epoch": 0.9162924951892238,
      "grad_norm": 0.7872333950088334,
      "learning_rate": 4.238589979292651e-06,
      "loss": 0.7448,
      "step": 14285
    },
    {
      "epoch": 0.9166132135984606,
      "grad_norm": 0.848658406503067,
      "learning_rate": 4.206401794489301e-06,
      "loss": 0.755,
      "step": 14290
    },
    {
      "epoch": 0.9169339320076972,
      "grad_norm": 0.7157699993484576,
      "learning_rate": 4.1743336698537805e-06,
      "loss": 0.6877,
      "step": 14295
    },
    {
      "epoch": 0.9172546504169339,
      "grad_norm": 0.920746793540226,
      "learning_rate": 4.142385645577707e-06,
      "loss": 0.6888,
      "step": 14300
    },
    {
      "epoch": 0.9175753688261706,
      "grad_norm": 0.6845975702530432,
      "learning_rate": 4.110557761702249e-06,
      "loss": 0.754,
      "step": 14305
    },
    {
      "epoch": 0.9178960872354073,
      "grad_norm": 1.1511196348448594,
      "learning_rate": 4.078850058117978e-06,
      "loss": 0.616,
      "step": 14310
    },
    {
      "epoch": 0.918216805644644,
      "grad_norm": 0.6109287776036132,
      "learning_rate": 4.0472625745648144e-06,
      "loss": 0.5921,
      "step": 14315
    },
    {
      "epoch": 0.9185375240538807,
      "grad_norm": 0.5799180489438701,
      "learning_rate": 4.015795350632068e-06,
      "loss": 0.6258,
      "step": 14320
    },
    {
      "epoch": 0.9188582424631174,
      "grad_norm": 1.0588410053870487,
      "learning_rate": 3.984448425758236e-06,
      "loss": 0.6294,
      "step": 14325
    },
    {
      "epoch": 0.9191789608723541,
      "grad_norm": 0.9656078510689677,
      "learning_rate": 3.953221839231125e-06,
      "loss": 0.7232,
      "step": 14330
    },
    {
      "epoch": 0.9194996792815907,
      "grad_norm": 0.7627108781290338,
      "learning_rate": 3.922115630187684e-06,
      "loss": 0.7192,
      "step": 14335
    },
    {
      "epoch": 0.9198203976908275,
      "grad_norm": 0.9118690797348065,
      "learning_rate": 3.8911298376139604e-06,
      "loss": 0.7131,
      "step": 14340
    },
    {
      "epoch": 0.9201411161000641,
      "grad_norm": 0.6032629064325823,
      "learning_rate": 3.860264500345145e-06,
      "loss": 0.701,
      "step": 14345
    },
    {
      "epoch": 0.9204618345093009,
      "grad_norm": 0.7887702725778526,
      "learning_rate": 3.829519657065417e-06,
      "loss": 0.4822,
      "step": 14350
    },
    {
      "epoch": 0.9207825529185375,
      "grad_norm": 0.7138715411195988,
      "learning_rate": 3.798895346307929e-06,
      "loss": 0.6301,
      "step": 14355
    },
    {
      "epoch": 0.9211032713277743,
      "grad_norm": 0.9024603895099268,
      "learning_rate": 3.768391606454824e-06,
      "loss": 0.7522,
      "step": 14360
    },
    {
      "epoch": 0.9214239897370109,
      "grad_norm": 1.0280776294268867,
      "learning_rate": 3.7380084757370427e-06,
      "loss": 0.5146,
      "step": 14365
    },
    {
      "epoch": 0.9217447081462476,
      "grad_norm": 1.2746584097883105,
      "learning_rate": 3.707745992234446e-06,
      "loss": 0.6437,
      "step": 14370
    },
    {
      "epoch": 0.9220654265554843,
      "grad_norm": 0.7420480886663697,
      "learning_rate": 3.677604193875639e-06,
      "loss": 0.7434,
      "step": 14375
    },
    {
      "epoch": 0.9223861449647209,
      "grad_norm": 0.7760260552269074,
      "learning_rate": 3.647583118438003e-06,
      "loss": 0.7314,
      "step": 14380
    },
    {
      "epoch": 0.9227068633739577,
      "grad_norm": 0.5526340026602907,
      "learning_rate": 3.617682803547573e-06,
      "loss": 0.6684,
      "step": 14385
    },
    {
      "epoch": 0.9230275817831943,
      "grad_norm": 0.8601770168248275,
      "learning_rate": 3.587903286679051e-06,
      "loss": 0.7048,
      "step": 14390
    },
    {
      "epoch": 0.9233483001924311,
      "grad_norm": 0.768831329847095,
      "learning_rate": 3.5582446051557694e-06,
      "loss": 0.7109,
      "step": 14395
    },
    {
      "epoch": 0.9236690186016677,
      "grad_norm": 0.7061972963645736,
      "learning_rate": 3.5287067961495613e-06,
      "loss": 0.7226,
      "step": 14400
    },
    {
      "epoch": 0.9239897370109045,
      "grad_norm": 0.9718492483949128,
      "learning_rate": 3.4992898966808128e-06,
      "loss": 0.6096,
      "step": 14405
    },
    {
      "epoch": 0.9243104554201411,
      "grad_norm": 0.6613307717148478,
      "learning_rate": 3.4699939436183548e-06,
      "loss": 0.6359,
      "step": 14410
    },
    {
      "epoch": 0.9246311738293778,
      "grad_norm": 0.48853477777273874,
      "learning_rate": 3.440818973679416e-06,
      "loss": 0.5916,
      "step": 14415
    },
    {
      "epoch": 0.9249518922386145,
      "grad_norm": 2.8872548788201846,
      "learning_rate": 3.411765023429625e-06,
      "loss": 0.6681,
      "step": 14420
    },
    {
      "epoch": 0.9252726106478512,
      "grad_norm": 0.8605678505533776,
      "learning_rate": 3.382832129282909e-06,
      "loss": 0.7061,
      "step": 14425
    },
    {
      "epoch": 0.9255933290570879,
      "grad_norm": 0.8152777611420922,
      "learning_rate": 3.354020327501506e-06,
      "loss": 0.7016,
      "step": 14430
    },
    {
      "epoch": 0.9259140474663246,
      "grad_norm": 0.5720911855352934,
      "learning_rate": 3.32532965419583e-06,
      "loss": 0.6065,
      "step": 14435
    },
    {
      "epoch": 0.9262347658755613,
      "grad_norm": 0.5729769215244488,
      "learning_rate": 3.29676014532454e-06,
      "loss": 0.6385,
      "step": 14440
    },
    {
      "epoch": 0.926555484284798,
      "grad_norm": 0.7971168307254297,
      "learning_rate": 3.2683118366944153e-06,
      "loss": 0.7482,
      "step": 14445
    },
    {
      "epoch": 0.9268762026940346,
      "grad_norm": 0.8082127626355636,
      "learning_rate": 3.2399847639603132e-06,
      "loss": 0.5749,
      "step": 14450
    },
    {
      "epoch": 0.9271969211032713,
      "grad_norm": 0.986366425048449,
      "learning_rate": 3.211778962625178e-06,
      "loss": 0.814,
      "step": 14455
    },
    {
      "epoch": 0.927517639512508,
      "grad_norm": 0.7974470102591675,
      "learning_rate": 3.1836944680399215e-06,
      "loss": 0.6845,
      "step": 14460
    },
    {
      "epoch": 0.9278383579217447,
      "grad_norm": 0.9030012061093406,
      "learning_rate": 3.155731315403465e-06,
      "loss": 0.7462,
      "step": 14465
    },
    {
      "epoch": 0.9281590763309814,
      "grad_norm": 0.8114451125831404,
      "learning_rate": 3.1278895397626295e-06,
      "loss": 0.7289,
      "step": 14470
    },
    {
      "epoch": 0.9284797947402181,
      "grad_norm": 0.7580184369514217,
      "learning_rate": 3.10016917601208e-06,
      "loss": 0.8204,
      "step": 14475
    },
    {
      "epoch": 0.9288005131494548,
      "grad_norm": 0.9028047332034969,
      "learning_rate": 3.0725702588943693e-06,
      "loss": 0.6502,
      "step": 14480
    },
    {
      "epoch": 0.9291212315586914,
      "grad_norm": 0.5328705285389578,
      "learning_rate": 3.0450928229997956e-06,
      "loss": 0.6282,
      "step": 14485
    },
    {
      "epoch": 0.9294419499679282,
      "grad_norm": 0.676301284723922,
      "learning_rate": 3.0177369027664324e-06,
      "loss": 0.6152,
      "step": 14490
    },
    {
      "epoch": 0.9297626683771648,
      "grad_norm": 0.6911219963447808,
      "learning_rate": 2.990502532480033e-06,
      "loss": 0.7075,
      "step": 14495
    },
    {
      "epoch": 0.9300833867864016,
      "grad_norm": 0.8158597361321028,
      "learning_rate": 2.9633897462740035e-06,
      "loss": 0.5278,
      "step": 14500
    },
    {
      "epoch": 0.9304041051956382,
      "grad_norm": 0.8885816510360459,
      "learning_rate": 2.936398578129407e-06,
      "loss": 0.7842,
      "step": 14505
    },
    {
      "epoch": 0.930724823604875,
      "grad_norm": 0.9090481734964072,
      "learning_rate": 2.909529061874816e-06,
      "loss": 0.6346,
      "step": 14510
    },
    {
      "epoch": 0.9310455420141116,
      "grad_norm": 0.6271937382541385,
      "learning_rate": 2.8827812311864044e-06,
      "loss": 0.4965,
      "step": 14515
    },
    {
      "epoch": 0.9313662604233482,
      "grad_norm": 0.8626519977341744,
      "learning_rate": 2.856155119587789e-06,
      "loss": 0.6916,
      "step": 14520
    },
    {
      "epoch": 0.931686978832585,
      "grad_norm": 1.2378284751762905,
      "learning_rate": 2.829650760450031e-06,
      "loss": 0.6573,
      "step": 14525
    },
    {
      "epoch": 0.9320076972418216,
      "grad_norm": 1.2677367998396853,
      "learning_rate": 2.8032681869916366e-06,
      "loss": 0.5755,
      "step": 14530
    },
    {
      "epoch": 0.9323284156510584,
      "grad_norm": 0.5109336107393835,
      "learning_rate": 2.7770074322784334e-06,
      "loss": 0.5688,
      "step": 14535
    },
    {
      "epoch": 0.932649134060295,
      "grad_norm": 0.7042004857736548,
      "learning_rate": 2.7508685292235937e-06,
      "loss": 0.7213,
      "step": 14540
    },
    {
      "epoch": 0.9329698524695318,
      "grad_norm": 0.7309101698002372,
      "learning_rate": 2.7248515105875673e-06,
      "loss": 0.6667,
      "step": 14545
    },
    {
      "epoch": 0.9332905708787684,
      "grad_norm": 0.6908743464424493,
      "learning_rate": 2.6989564089780263e-06,
      "loss": 0.6156,
      "step": 14550
    },
    {
      "epoch": 0.9336112892880052,
      "grad_norm": 0.9549405672325,
      "learning_rate": 2.673183256849876e-06,
      "loss": 0.5705,
      "step": 14555
    },
    {
      "epoch": 0.9339320076972418,
      "grad_norm": 0.8108069141144446,
      "learning_rate": 2.6475320865051444e-06,
      "loss": 0.6301,
      "step": 14560
    },
    {
      "epoch": 0.9342527261064785,
      "grad_norm": 0.7542934406058188,
      "learning_rate": 2.6220029300930037e-06,
      "loss": 0.6081,
      "step": 14565
    },
    {
      "epoch": 0.9345734445157152,
      "grad_norm": 0.8121008842739622,
      "learning_rate": 2.5965958196096706e-06,
      "loss": 0.7333,
      "step": 14570
    },
    {
      "epoch": 0.9348941629249519,
      "grad_norm": 0.7044098978011041,
      "learning_rate": 2.571310786898451e-06,
      "loss": 0.6786,
      "step": 14575
    },
    {
      "epoch": 0.9352148813341886,
      "grad_norm": 0.669296953567193,
      "learning_rate": 2.5461478636496062e-06,
      "loss": 0.6451,
      "step": 14580
    },
    {
      "epoch": 0.9355355997434253,
      "grad_norm": 1.0134964970782947,
      "learning_rate": 2.5211070814003536e-06,
      "loss": 0.7071,
      "step": 14585
    },
    {
      "epoch": 0.935856318152662,
      "grad_norm": 0.8079966960225432,
      "learning_rate": 2.496188471534866e-06,
      "loss": 0.6494,
      "step": 14590
    },
    {
      "epoch": 0.9361770365618987,
      "grad_norm": 0.7980284916096867,
      "learning_rate": 2.4713920652841394e-06,
      "loss": 0.6966,
      "step": 14595
    },
    {
      "epoch": 0.9364977549711353,
      "grad_norm": 1.4182606806536633,
      "learning_rate": 2.4467178937260692e-06,
      "loss": 0.5106,
      "step": 14600
    },
    {
      "epoch": 0.936818473380372,
      "grad_norm": 1.1450293247030983,
      "learning_rate": 2.4221659877853074e-06,
      "loss": 0.6734,
      "step": 14605
    },
    {
      "epoch": 0.9371391917896087,
      "grad_norm": 0.7955638461295016,
      "learning_rate": 2.397736378233284e-06,
      "loss": 0.725,
      "step": 14610
    },
    {
      "epoch": 0.9374599101988454,
      "grad_norm": 0.7397520509486079,
      "learning_rate": 2.3734290956881734e-06,
      "loss": 0.6244,
      "step": 14615
    },
    {
      "epoch": 0.9377806286080821,
      "grad_norm": 0.9732579754101209,
      "learning_rate": 2.349244170614773e-06,
      "loss": 0.6057,
      "step": 14620
    },
    {
      "epoch": 0.9381013470173188,
      "grad_norm": 1.406456086581141,
      "learning_rate": 2.3251816333246025e-06,
      "loss": 0.7182,
      "step": 14625
    },
    {
      "epoch": 0.9384220654265555,
      "grad_norm": 0.8952424347381697,
      "learning_rate": 2.301241513975749e-06,
      "loss": 0.7598,
      "step": 14630
    },
    {
      "epoch": 0.9387427838357921,
      "grad_norm": 1.0218439096331748,
      "learning_rate": 2.2774238425728677e-06,
      "loss": 0.7246,
      "step": 14635
    },
    {
      "epoch": 0.9390635022450289,
      "grad_norm": 0.7685781373474748,
      "learning_rate": 2.2537286489671573e-06,
      "loss": 0.5579,
      "step": 14640
    },
    {
      "epoch": 0.9393842206542655,
      "grad_norm": 0.7182539188714678,
      "learning_rate": 2.2301559628563062e-06,
      "loss": 0.4816,
      "step": 14645
    },
    {
      "epoch": 0.9397049390635023,
      "grad_norm": 0.7271338524133633,
      "learning_rate": 2.206705813784471e-06,
      "loss": 0.7117,
      "step": 14650
    },
    {
      "epoch": 0.9400256574727389,
      "grad_norm": 0.9142892488291297,
      "learning_rate": 2.18337823114223e-06,
      "loss": 0.5035,
      "step": 14655
    },
    {
      "epoch": 0.9403463758819757,
      "grad_norm": 1.1230106908678623,
      "learning_rate": 2.160173244166541e-06,
      "loss": 0.5692,
      "step": 14660
    },
    {
      "epoch": 0.9406670942912123,
      "grad_norm": 0.40796226780607736,
      "learning_rate": 2.1370908819407174e-06,
      "loss": 0.5771,
      "step": 14665
    },
    {
      "epoch": 0.940987812700449,
      "grad_norm": 0.9481608724103522,
      "learning_rate": 2.1141311733943626e-06,
      "loss": 0.5029,
      "step": 14670
    },
    {
      "epoch": 0.9413085311096857,
      "grad_norm": 1.0000026556770782,
      "learning_rate": 2.09129414730338e-06,
      "loss": 0.6156,
      "step": 14675
    },
    {
      "epoch": 0.9416292495189224,
      "grad_norm": 0.521971426032197,
      "learning_rate": 2.0685798322899073e-06,
      "loss": 0.6233,
      "step": 14680
    },
    {
      "epoch": 0.9419499679281591,
      "grad_norm": 0.555113548672577,
      "learning_rate": 2.045988256822273e-06,
      "loss": 0.6226,
      "step": 14685
    },
    {
      "epoch": 0.9422706863373957,
      "grad_norm": 1.0940970203612415,
      "learning_rate": 2.0235194492149832e-06,
      "loss": 0.6603,
      "step": 14690
    },
    {
      "epoch": 0.9425914047466325,
      "grad_norm": 1.0787803604629624,
      "learning_rate": 2.0011734376286896e-06,
      "loss": 0.6915,
      "step": 14695
    },
    {
      "epoch": 0.9429121231558691,
      "grad_norm": 0.603441598329727,
      "learning_rate": 1.978950250070111e-06,
      "loss": 0.7826,
      "step": 14700
    },
    {
      "epoch": 0.9432328415651058,
      "grad_norm": 1.1933790532010597,
      "learning_rate": 1.9568499143920336e-06,
      "loss": 0.6277,
      "step": 14705
    },
    {
      "epoch": 0.9435535599743425,
      "grad_norm": 0.5764914897220961,
      "learning_rate": 1.9348724582933133e-06,
      "loss": 0.6875,
      "step": 14710
    },
    {
      "epoch": 0.9438742783835792,
      "grad_norm": 0.9696889870454197,
      "learning_rate": 1.9130179093187484e-06,
      "loss": 0.8159,
      "step": 14715
    },
    {
      "epoch": 0.9441949967928159,
      "grad_norm": 1.174884517440042,
      "learning_rate": 1.891286294859107e-06,
      "loss": 0.7811,
      "step": 14720
    },
    {
      "epoch": 0.9445157152020526,
      "grad_norm": 0.7432254800663841,
      "learning_rate": 1.869677642151102e-06,
      "loss": 0.8169,
      "step": 14725
    },
    {
      "epoch": 0.9448364336112893,
      "grad_norm": 1.3451481683596176,
      "learning_rate": 1.8481919782773138e-06,
      "loss": 0.6386,
      "step": 14730
    },
    {
      "epoch": 0.945157152020526,
      "grad_norm": 0.8999549303642768,
      "learning_rate": 1.82682933016618e-06,
      "loss": 0.6578,
      "step": 14735
    },
    {
      "epoch": 0.9454778704297627,
      "grad_norm": 0.7535938047620351,
      "learning_rate": 1.8055897245919718e-06,
      "loss": 0.6345,
      "step": 14740
    },
    {
      "epoch": 0.9457985888389994,
      "grad_norm": 0.9031933438522918,
      "learning_rate": 1.78447318817474e-06,
      "loss": 0.6979,
      "step": 14745
    },
    {
      "epoch": 0.946119307248236,
      "grad_norm": 0.5909234139284275,
      "learning_rate": 1.7634797473802922e-06,
      "loss": 0.5283,
      "step": 14750
    },
    {
      "epoch": 0.9464400256574728,
      "grad_norm": 0.7478929356403822,
      "learning_rate": 1.7426094285201478e-06,
      "loss": 0.7548,
      "step": 14755
    },
    {
      "epoch": 0.9467607440667094,
      "grad_norm": 0.7939890902510196,
      "learning_rate": 1.7218622577515496e-06,
      "loss": 0.7005,
      "step": 14760
    },
    {
      "epoch": 0.9470814624759462,
      "grad_norm": 0.6058878555015041,
      "learning_rate": 1.7012382610773315e-06,
      "loss": 0.6766,
      "step": 14765
    },
    {
      "epoch": 0.9474021808851828,
      "grad_norm": 0.848486027790844,
      "learning_rate": 1.6807374643460272e-06,
      "loss": 0.7677,
      "step": 14770
    },
    {
      "epoch": 0.9477228992944196,
      "grad_norm": 0.7595303087988711,
      "learning_rate": 1.6603598932517061e-06,
      "loss": 0.7407,
      "step": 14775
    },
    {
      "epoch": 0.9480436177036562,
      "grad_norm": 0.7579789167134414,
      "learning_rate": 1.6401055733340164e-06,
      "loss": 0.669,
      "step": 14780
    },
    {
      "epoch": 0.9483643361128928,
      "grad_norm": 1.2648466067379471,
      "learning_rate": 1.61997452997813e-06,
      "loss": 0.6469,
      "step": 14785
    },
    {
      "epoch": 0.9486850545221296,
      "grad_norm": 0.797026657881511,
      "learning_rate": 1.5999667884147196e-06,
      "loss": 0.588,
      "step": 14790
    },
    {
      "epoch": 0.9490057729313662,
      "grad_norm": 0.915174796254417,
      "learning_rate": 1.5800823737199156e-06,
      "loss": 0.7036,
      "step": 14795
    },
    {
      "epoch": 0.949326491340603,
      "grad_norm": 0.7014564001359544,
      "learning_rate": 1.5603213108152715e-06,
      "loss": 0.604,
      "step": 14800
    },
    {
      "epoch": 0.9496472097498396,
      "grad_norm": 1.0673933698941918,
      "learning_rate": 1.5406836244677646e-06,
      "loss": 0.6767,
      "step": 14805
    },
    {
      "epoch": 0.9499679281590764,
      "grad_norm": 0.5974581758846627,
      "learning_rate": 1.5211693392897185e-06,
      "loss": 0.6277,
      "step": 14810
    },
    {
      "epoch": 0.950288646568313,
      "grad_norm": 0.76752354413579,
      "learning_rate": 1.5017784797388024e-06,
      "loss": 0.6575,
      "step": 14815
    },
    {
      "epoch": 0.9506093649775497,
      "grad_norm": 0.6302709486833972,
      "learning_rate": 1.482511070118009e-06,
      "loss": 0.5797,
      "step": 14820
    },
    {
      "epoch": 0.9509300833867864,
      "grad_norm": 0.6408626471147529,
      "learning_rate": 1.4633671345755884e-06,
      "loss": 0.6938,
      "step": 14825
    },
    {
      "epoch": 0.9512508017960231,
      "grad_norm": 1.147885938640683,
      "learning_rate": 1.4443466971050367e-06,
      "loss": 0.6631,
      "step": 14830
    },
    {
      "epoch": 0.9515715202052598,
      "grad_norm": 1.2090975514637632,
      "learning_rate": 1.4254497815450852e-06,
      "loss": 0.5987,
      "step": 14835
    },
    {
      "epoch": 0.9518922386144965,
      "grad_norm": 1.4462854589201612,
      "learning_rate": 1.4066764115796328e-06,
      "loss": 0.5496,
      "step": 14840
    },
    {
      "epoch": 0.9522129570237332,
      "grad_norm": 2.2267736323891603,
      "learning_rate": 1.3880266107377581e-06,
      "loss": 0.6236,
      "step": 14845
    },
    {
      "epoch": 0.9525336754329699,
      "grad_norm": 0.9767897268690148,
      "learning_rate": 1.369500402393653e-06,
      "loss": 0.6737,
      "step": 14850
    },
    {
      "epoch": 0.9528543938422065,
      "grad_norm": 0.6597022287518994,
      "learning_rate": 1.3510978097665994e-06,
      "loss": 0.6009,
      "step": 14855
    },
    {
      "epoch": 0.9531751122514432,
      "grad_norm": 0.8352297747099178,
      "learning_rate": 1.332818855920981e-06,
      "loss": 0.6206,
      "step": 14860
    },
    {
      "epoch": 0.9534958306606799,
      "grad_norm": 0.3398468741414835,
      "learning_rate": 1.314663563766172e-06,
      "loss": 0.745,
      "step": 14865
    },
    {
      "epoch": 0.9538165490699166,
      "grad_norm": 0.6650997138673455,
      "learning_rate": 1.2966319560566264e-06,
      "loss": 0.5189,
      "step": 14870
    },
    {
      "epoch": 0.9541372674791533,
      "grad_norm": 0.8495035997423334,
      "learning_rate": 1.2787240553917223e-06,
      "loss": 0.5352,
      "step": 14875
    },
    {
      "epoch": 0.95445798588839,
      "grad_norm": 0.6804679950864659,
      "learning_rate": 1.2609398842158171e-06,
      "loss": 0.5298,
      "step": 14880
    },
    {
      "epoch": 0.9547787042976267,
      "grad_norm": 0.9011394842975389,
      "learning_rate": 1.2432794648181922e-06,
      "loss": 0.6416,
      "step": 14885
    },
    {
      "epoch": 0.9550994227068633,
      "grad_norm": 0.8017624405517991,
      "learning_rate": 1.225742819333031e-06,
      "loss": 0.7683,
      "step": 14890
    },
    {
      "epoch": 0.9554201411161001,
      "grad_norm": 1.0189493989237226,
      "learning_rate": 1.2083299697393968e-06,
      "loss": 0.6712,
      "step": 14895
    },
    {
      "epoch": 0.9557408595253367,
      "grad_norm": 0.8632861800860692,
      "learning_rate": 1.1910409378611653e-06,
      "loss": 0.6677,
      "step": 14900
    },
    {
      "epoch": 0.9560615779345735,
      "grad_norm": 0.8271377018484679,
      "learning_rate": 1.17387574536707e-06,
      "loss": 0.8435,
      "step": 14905
    },
    {
      "epoch": 0.9563822963438101,
      "grad_norm": 1.090763241775662,
      "learning_rate": 1.1568344137706133e-06,
      "loss": 0.751,
      "step": 14910
    },
    {
      "epoch": 0.9567030147530469,
      "grad_norm": 0.8533558406500173,
      "learning_rate": 1.1399169644300323e-06,
      "loss": 0.7627,
      "step": 14915
    },
    {
      "epoch": 0.9570237331622835,
      "grad_norm": 0.7969691903367916,
      "learning_rate": 1.1231234185483663e-06,
      "loss": 0.6599,
      "step": 14920
    },
    {
      "epoch": 0.9573444515715203,
      "grad_norm": 0.6892919393359965,
      "learning_rate": 1.1064537971733124e-06,
      "loss": 0.6862,
      "step": 14925
    },
    {
      "epoch": 0.9576651699807569,
      "grad_norm": 0.8464857234158932,
      "learning_rate": 1.0899081211972584e-06,
      "loss": 0.8058,
      "step": 14930
    },
    {
      "epoch": 0.9579858883899935,
      "grad_norm": 0.5019234017303561,
      "learning_rate": 1.0734864113572606e-06,
      "loss": 0.684,
      "step": 14935
    },
    {
      "epoch": 0.9583066067992303,
      "grad_norm": 0.7995354303661617,
      "learning_rate": 1.057188688234989e-06,
      "loss": 0.577,
      "step": 14940
    },
    {
      "epoch": 0.9586273252084669,
      "grad_norm": 1.053084388323032,
      "learning_rate": 1.0410149722567376e-06,
      "loss": 0.6179,
      "step": 14945
    },
    {
      "epoch": 0.9589480436177037,
      "grad_norm": 0.9473025528524849,
      "learning_rate": 1.0249652836933688e-06,
      "loss": 0.6448,
      "step": 14950
    },
    {
      "epoch": 0.9592687620269403,
      "grad_norm": 0.8867828551638389,
      "learning_rate": 1.0090396426603143e-06,
      "loss": 0.7081,
      "step": 14955
    },
    {
      "epoch": 0.9595894804361771,
      "grad_norm": 0.579392165704179,
      "learning_rate": 9.93238069117508e-07,
      "loss": 0.6266,
      "step": 14960
    },
    {
      "epoch": 0.9599101988454137,
      "grad_norm": 1.3419589121931794,
      "learning_rate": 9.775605828693969e-07,
      "loss": 0.6619,
      "step": 14965
    },
    {
      "epoch": 0.9602309172546504,
      "grad_norm": 0.9125359836127329,
      "learning_rate": 9.620072035649075e-07,
      "loss": 0.6073,
      "step": 14970
    },
    {
      "epoch": 0.9605516356638871,
      "grad_norm": 1.0860000796878035,
      "learning_rate": 9.465779506974359e-07,
      "loss": 0.5401,
      "step": 14975
    },
    {
      "epoch": 0.9608723540731238,
      "grad_norm": 1.171824681775004,
      "learning_rate": 9.312728436047913e-07,
      "loss": 0.5753,
      "step": 14980
    },
    {
      "epoch": 0.9611930724823605,
      "grad_norm": 0.5643018528812354,
      "learning_rate": 9.160919014691848e-07,
      "loss": 0.5638,
      "step": 14985
    },
    {
      "epoch": 0.9615137908915972,
      "grad_norm": 0.9034235555165777,
      "learning_rate": 9.010351433172304e-07,
      "loss": 0.6334,
      "step": 14990
    },
    {
      "epoch": 0.9618345093008339,
      "grad_norm": 1.1839905897068703,
      "learning_rate": 8.86102588019877e-07,
      "loss": 0.7153,
      "step": 14995
    },
    {
      "epoch": 0.9621552277100706,
      "grad_norm": 0.8180578726272846,
      "learning_rate": 8.712942542923986e-07,
      "loss": 0.5817,
      "step": 15000
    },
    {
      "epoch": 0.9624759461193072,
      "grad_norm": 1.0696335688074747,
      "learning_rate": 8.566101606944266e-07,
      "loss": 0.6736,
      "step": 15005
    },
    {
      "epoch": 0.962796664528544,
      "grad_norm": 0.7303824338994761,
      "learning_rate": 8.420503256298396e-07,
      "loss": 0.6429,
      "step": 15010
    },
    {
      "epoch": 0.9631173829377806,
      "grad_norm": 1.0294755318998579,
      "learning_rate": 8.276147673467849e-07,
      "loss": 0.7188,
      "step": 15015
    },
    {
      "epoch": 0.9634381013470174,
      "grad_norm": 0.9556262852737702,
      "learning_rate": 8.133035039376679e-07,
      "loss": 0.5951,
      "step": 15020
    },
    {
      "epoch": 0.963758819756254,
      "grad_norm": 0.9324693251087647,
      "learning_rate": 7.991165533390854e-07,
      "loss": 0.7127,
      "step": 15025
    },
    {
      "epoch": 0.9640795381654907,
      "grad_norm": 0.9591152159542692,
      "learning_rate": 7.850539333318585e-07,
      "loss": 0.6322,
      "step": 15030
    },
    {
      "epoch": 0.9644002565747274,
      "grad_norm": 0.6946002197246557,
      "learning_rate": 7.711156615409665e-07,
      "loss": 0.5755,
      "step": 15035
    },
    {
      "epoch": 0.964720974983964,
      "grad_norm": 1.3334758098994104,
      "learning_rate": 7.573017554355355e-07,
      "loss": 0.6318,
      "step": 15040
    },
    {
      "epoch": 0.9650416933932008,
      "grad_norm": 0.8978971885207064,
      "learning_rate": 7.436122323288497e-07,
      "loss": 0.6035,
      "step": 15045
    },
    {
      "epoch": 0.9653624118024374,
      "grad_norm": 0.8103686748723528,
      "learning_rate": 7.300471093782624e-07,
      "loss": 0.6194,
      "step": 15050
    },
    {
      "epoch": 0.9656831302116742,
      "grad_norm": 0.753034703476334,
      "learning_rate": 7.166064035852405e-07,
      "loss": 0.6241,
      "step": 15055
    },
    {
      "epoch": 0.9660038486209108,
      "grad_norm": 0.8194295630630289,
      "learning_rate": 7.032901317953089e-07,
      "loss": 0.804,
      "step": 15060
    },
    {
      "epoch": 0.9663245670301476,
      "grad_norm": 0.6380479125093319,
      "learning_rate": 6.900983106980396e-07,
      "loss": 0.4591,
      "step": 15065
    },
    {
      "epoch": 0.9666452854393842,
      "grad_norm": 0.6010950679928249,
      "learning_rate": 6.770309568270183e-07,
      "loss": 0.5964,
      "step": 15070
    },
    {
      "epoch": 0.9669660038486209,
      "grad_norm": 0.6142851169104145,
      "learning_rate": 6.640880865598331e-07,
      "loss": 0.515,
      "step": 15075
    },
    {
      "epoch": 0.9672867222578576,
      "grad_norm": 0.5969279751540932,
      "learning_rate": 6.512697161180859e-07,
      "loss": 0.5795,
      "step": 15080
    },
    {
      "epoch": 0.9676074406670943,
      "grad_norm": 1.1554904145083251,
      "learning_rate": 6.38575861567281e-07,
      "loss": 0.7483,
      "step": 15085
    },
    {
      "epoch": 0.967928159076331,
      "grad_norm": 0.7865746542213344,
      "learning_rate": 6.260065388169256e-07,
      "loss": 0.5557,
      "step": 15090
    },
    {
      "epoch": 0.9682488774855676,
      "grad_norm": 1.1050848806521416,
      "learning_rate": 6.135617636204072e-07,
      "loss": 0.5939,
      "step": 15095
    },
    {
      "epoch": 0.9685695958948044,
      "grad_norm": 0.7070536160439901,
      "learning_rate": 6.01241551575027e-07,
      "loss": 0.6985,
      "step": 15100
    },
    {
      "epoch": 0.968890314304041,
      "grad_norm": 1.105194184766872,
      "learning_rate": 5.890459181219776e-07,
      "loss": 0.7083,
      "step": 15105
    },
    {
      "epoch": 0.9692110327132777,
      "grad_norm": 1.2744464352233527,
      "learning_rate": 5.769748785463103e-07,
      "loss": 0.6397,
      "step": 15110
    },
    {
      "epoch": 0.9695317511225144,
      "grad_norm": 0.9272062316818276,
      "learning_rate": 5.650284479769008e-07,
      "loss": 0.7676,
      "step": 15115
    },
    {
      "epoch": 0.9698524695317511,
      "grad_norm": 0.7995773908927787,
      "learning_rate": 5.532066413864834e-07,
      "loss": 0.6971,
      "step": 15120
    },
    {
      "epoch": 0.9701731879409878,
      "grad_norm": 0.38586358236871543,
      "learning_rate": 5.415094735915838e-07,
      "loss": 0.6707,
      "step": 15125
    },
    {
      "epoch": 0.9704939063502245,
      "grad_norm": 0.9134739108193013,
      "learning_rate": 5.299369592524972e-07,
      "loss": 0.7099,
      "step": 15130
    },
    {
      "epoch": 0.9708146247594612,
      "grad_norm": 1.1214413150852183,
      "learning_rate": 5.184891128733216e-07,
      "loss": 0.5773,
      "step": 15135
    },
    {
      "epoch": 0.9711353431686979,
      "grad_norm": 0.9080341063196368,
      "learning_rate": 5.071659488018688e-07,
      "loss": 0.5541,
      "step": 15140
    },
    {
      "epoch": 0.9714560615779346,
      "grad_norm": 0.6396326113379124,
      "learning_rate": 4.959674812297089e-07,
      "loss": 0.7547,
      "step": 15145
    },
    {
      "epoch": 0.9717767799871713,
      "grad_norm": 0.6247330527268826,
      "learning_rate": 4.848937241921369e-07,
      "loss": 0.7347,
      "step": 15150
    },
    {
      "epoch": 0.9720974983964079,
      "grad_norm": 0.7413180396760661,
      "learning_rate": 4.7394469156810674e-07,
      "loss": 0.6324,
      "step": 15155
    },
    {
      "epoch": 0.9724182168056447,
      "grad_norm": 0.8191285127812412,
      "learning_rate": 4.6312039708028553e-07,
      "loss": 0.6501,
      "step": 15160
    },
    {
      "epoch": 0.9727389352148813,
      "grad_norm": 1.5646180696875727,
      "learning_rate": 4.5242085429499923e-07,
      "loss": 0.7018,
      "step": 15165
    },
    {
      "epoch": 0.9730596536241181,
      "grad_norm": 1.05700452006374,
      "learning_rate": 4.4184607662220987e-07,
      "loss": 0.702,
      "step": 15170
    },
    {
      "epoch": 0.9733803720333547,
      "grad_norm": 0.6341783140741876,
      "learning_rate": 4.313960773155046e-07,
      "loss": 0.636,
      "step": 15175
    },
    {
      "epoch": 0.9737010904425915,
      "grad_norm": 0.7888859139283535,
      "learning_rate": 4.2107086947209553e-07,
      "loss": 0.6313,
      "step": 15180
    },
    {
      "epoch": 0.9740218088518281,
      "grad_norm": 0.9191085670941561,
      "learning_rate": 4.1087046603279777e-07,
      "loss": 0.6221,
      "step": 15185
    },
    {
      "epoch": 0.9743425272610647,
      "grad_norm": 0.747755641512419,
      "learning_rate": 4.007948797819738e-07,
      "loss": 0.7214,
      "step": 15190
    },
    {
      "epoch": 0.9746632456703015,
      "grad_norm": 0.977703835187041,
      "learning_rate": 3.90844123347589e-07,
      "loss": 0.6226,
      "step": 15195
    },
    {
      "epoch": 0.9749839640795381,
      "grad_norm": 1.0760333069724886,
      "learning_rate": 3.8101820920114494e-07,
      "loss": 0.5479,
      "step": 15200
    },
    {
      "epoch": 0.9753046824887749,
      "grad_norm": 0.6944511489853861,
      "learning_rate": 3.713171496576573e-07,
      "loss": 0.5499,
      "step": 15205
    },
    {
      "epoch": 0.9756254008980115,
      "grad_norm": 0.8427188819091052,
      "learning_rate": 3.617409568756669e-07,
      "loss": 0.7567,
      "step": 15210
    },
    {
      "epoch": 0.9759461193072483,
      "grad_norm": 0.8552901758457413,
      "learning_rate": 3.5228964285722864e-07,
      "loss": 0.5683,
      "step": 15215
    },
    {
      "epoch": 0.9762668377164849,
      "grad_norm": 1.3132456382472737,
      "learning_rate": 3.429632194478782e-07,
      "loss": 0.6284,
      "step": 15220
    },
    {
      "epoch": 0.9765875561257216,
      "grad_norm": 0.7318279273617357,
      "learning_rate": 3.337616983366321e-07,
      "loss": 0.5582,
      "step": 15225
    },
    {
      "epoch": 0.9769082745349583,
      "grad_norm": 0.6573550653291185,
      "learning_rate": 3.246850910559318e-07,
      "loss": 0.5491,
      "step": 15230
    },
    {
      "epoch": 0.977228992944195,
      "grad_norm": 0.8242113768294678,
      "learning_rate": 3.157334089816888e-07,
      "loss": 0.7255,
      "step": 15235
    },
    {
      "epoch": 0.9775497113534317,
      "grad_norm": 0.9030228435778539,
      "learning_rate": 3.0690666333325067e-07,
      "loss": 0.5873,
      "step": 15240
    },
    {
      "epoch": 0.9778704297626684,
      "grad_norm": 0.565513303166446,
      "learning_rate": 2.9820486517335713e-07,
      "loss": 0.598,
      "step": 15245
    },
    {
      "epoch": 0.9781911481719051,
      "grad_norm": 0.6147817142778307,
      "learning_rate": 2.896280254081618e-07,
      "loss": 0.7145,
      "step": 15250
    },
    {
      "epoch": 0.9785118665811418,
      "grad_norm": 0.8743323298527471,
      "learning_rate": 2.811761547871994e-07,
      "loss": 0.6756,
      "step": 15255
    },
    {
      "epoch": 0.9788325849903784,
      "grad_norm": 1.1307500659483494,
      "learning_rate": 2.728492639033742e-07,
      "loss": 0.6188,
      "step": 15260
    },
    {
      "epoch": 0.9791533033996151,
      "grad_norm": 0.7125463266714677,
      "learning_rate": 2.6464736319297136e-07,
      "loss": 0.6278,
      "step": 15265
    },
    {
      "epoch": 0.9794740218088518,
      "grad_norm": 0.5910469031411075,
      "learning_rate": 2.5657046293560137e-07,
      "loss": 0.6905,
      "step": 15270
    },
    {
      "epoch": 0.9797947402180885,
      "grad_norm": 0.7878661937473239,
      "learning_rate": 2.4861857325421123e-07,
      "loss": 0.7325,
      "step": 15275
    },
    {
      "epoch": 0.9801154586273252,
      "grad_norm": 0.8286733473521487,
      "learning_rate": 2.4079170411507315e-07,
      "loss": 0.7773,
      "step": 15280
    },
    {
      "epoch": 0.9804361770365619,
      "grad_norm": 0.9685767265903029,
      "learning_rate": 2.3308986532778464e-07,
      "loss": 0.646,
      "step": 15285
    },
    {
      "epoch": 0.9807568954457986,
      "grad_norm": 0.9361901486165769,
      "learning_rate": 2.255130665452243e-07,
      "loss": 0.6598,
      "step": 15290
    },
    {
      "epoch": 0.9810776138550352,
      "grad_norm": 0.9598712869867538,
      "learning_rate": 2.180613172635404e-07,
      "loss": 0.5625,
      "step": 15295
    },
    {
      "epoch": 0.981398332264272,
      "grad_norm": 0.53570267588639,
      "learning_rate": 2.1073462682217325e-07,
      "loss": 0.5784,
      "step": 15300
    },
    {
      "epoch": 0.9817190506735086,
      "grad_norm": 0.7566088957917948,
      "learning_rate": 2.0353300440382194e-07,
      "loss": 0.6119,
      "step": 15305
    },
    {
      "epoch": 0.9820397690827454,
      "grad_norm": 1.146329754716512,
      "learning_rate": 1.9645645903444422e-07,
      "loss": 0.7188,
      "step": 15310
    },
    {
      "epoch": 0.982360487491982,
      "grad_norm": 0.8370973588336825,
      "learning_rate": 1.895049995832232e-07,
      "loss": 0.7563,
      "step": 15315
    },
    {
      "epoch": 0.9826812059012188,
      "grad_norm": 0.9434580889772379,
      "learning_rate": 1.8267863476255643e-07,
      "loss": 0.7839,
      "step": 15320
    },
    {
      "epoch": 0.9830019243104554,
      "grad_norm": 0.8804750628505544,
      "learning_rate": 1.7597737312810004e-07,
      "loss": 0.4332,
      "step": 15325
    },
    {
      "epoch": 0.9833226427196922,
      "grad_norm": 0.7320489722005881,
      "learning_rate": 1.694012230786579e-07,
      "loss": 0.7652,
      "step": 15330
    },
    {
      "epoch": 0.9836433611289288,
      "grad_norm": 0.7366362970085942,
      "learning_rate": 1.6295019285628154e-07,
      "loss": 0.7341,
      "step": 15335
    },
    {
      "epoch": 0.9839640795381654,
      "grad_norm": 1.0140709106862729,
      "learning_rate": 1.5662429054618122e-07,
      "loss": 0.4945,
      "step": 15340
    },
    {
      "epoch": 0.9842847979474022,
      "grad_norm": 1.9484887809729772,
      "learning_rate": 1.504235240767371e-07,
      "loss": 0.6308,
      "step": 15345
    },
    {
      "epoch": 0.9846055163566388,
      "grad_norm": 0.9619117197899885,
      "learning_rate": 1.4434790121951036e-07,
      "loss": 0.6099,
      "step": 15350
    },
    {
      "epoch": 0.9849262347658756,
      "grad_norm": 0.9949706333975902,
      "learning_rate": 1.3839742958920987e-07,
      "loss": 0.5725,
      "step": 15355
    },
    {
      "epoch": 0.9852469531751122,
      "grad_norm": 0.9242186083511401,
      "learning_rate": 1.3257211664368106e-07,
      "loss": 0.6308,
      "step": 15360
    },
    {
      "epoch": 0.985567671584349,
      "grad_norm": 1.0782239190960032,
      "learning_rate": 1.2687196968392822e-07,
      "loss": 0.6935,
      "step": 15365
    },
    {
      "epoch": 0.9858883899935856,
      "grad_norm": 0.8111644243864005,
      "learning_rate": 1.2129699585404774e-07,
      "loss": 0.7241,
      "step": 15370
    },
    {
      "epoch": 0.9862091084028223,
      "grad_norm": 0.7276347564310323,
      "learning_rate": 1.1584720214129485e-07,
      "loss": 0.6842,
      "step": 15375
    },
    {
      "epoch": 0.986529826812059,
      "grad_norm": 1.036558622735431,
      "learning_rate": 1.1052259537599474e-07,
      "loss": 0.7109,
      "step": 15380
    },
    {
      "epoch": 0.9868505452212957,
      "grad_norm": 0.8442723448288622,
      "learning_rate": 1.053231822315981e-07,
      "loss": 0.5197,
      "step": 15385
    },
    {
      "epoch": 0.9871712636305324,
      "grad_norm": 0.7755592771907561,
      "learning_rate": 1.0024896922464777e-07,
      "loss": 0.5958,
      "step": 15390
    },
    {
      "epoch": 0.9874919820397691,
      "grad_norm": 1.0235862204819772,
      "learning_rate": 9.529996271475661e-08,
      "loss": 0.7323,
      "step": 15395
    },
    {
      "epoch": 0.9878127004490058,
      "grad_norm": 0.6802556392432448,
      "learning_rate": 9.047616890461852e-08,
      "loss": 0.6661,
      "step": 15400
    },
    {
      "epoch": 0.9881334188582425,
      "grad_norm": 0.7642842609623561,
      "learning_rate": 8.57775938399974e-08,
      "loss": 0.6418,
      "step": 15405
    },
    {
      "epoch": 0.9884541372674791,
      "grad_norm": 0.7629833080692018,
      "learning_rate": 8.1204243409716e-08,
      "loss": 0.71,
      "step": 15410
    },
    {
      "epoch": 0.9887748556767159,
      "grad_norm": 0.8028551912844719,
      "learning_rate": 7.675612334566706e-08,
      "loss": 0.6261,
      "step": 15415
    },
    {
      "epoch": 0.9890955740859525,
      "grad_norm": 0.8568280018874693,
      "learning_rate": 7.24332392227578e-08,
      "loss": 0.7818,
      "step": 15420
    },
    {
      "epoch": 0.9894162924951893,
      "grad_norm": 0.9435010043749265,
      "learning_rate": 6.823559645896538e-08,
      "loss": 0.7135,
      "step": 15425
    },
    {
      "epoch": 0.9897370109044259,
      "grad_norm": 0.8536947904193946,
      "learning_rate": 6.416320031527035e-08,
      "loss": 0.6909,
      "step": 15430
    },
    {
      "epoch": 0.9900577293136626,
      "grad_norm": 0.6375751055715156,
      "learning_rate": 6.02160558957121e-08,
      "loss": 0.7567,
      "step": 15435
    },
    {
      "epoch": 0.9903784477228993,
      "grad_norm": 0.722851635446421,
      "learning_rate": 5.639416814731124e-08,
      "loss": 0.595,
      "step": 15440
    },
    {
      "epoch": 0.9906991661321359,
      "grad_norm": 0.6530835942019998,
      "learning_rate": 5.269754186013609e-08,
      "loss": 0.6185,
      "step": 15445
    },
    {
      "epoch": 0.9910198845413727,
      "grad_norm": 1.0508657841447764,
      "learning_rate": 4.912618166723615e-08,
      "loss": 0.5615,
      "step": 15450
    },
    {
      "epoch": 0.9913406029506093,
      "grad_norm": 0.89657789016663,
      "learning_rate": 4.5680092044686486e-08,
      "loss": 0.686,
      "step": 15455
    },
    {
      "epoch": 0.9916613213598461,
      "grad_norm": 1.0049970608249212,
      "learning_rate": 4.235927731153222e-08,
      "loss": 0.5976,
      "step": 15460
    },
    {
      "epoch": 0.9919820397690827,
      "grad_norm": 0.5955235189985802,
      "learning_rate": 3.916374162983294e-08,
      "loss": 0.4921,
      "step": 15465
    },
    {
      "epoch": 0.9923027581783195,
      "grad_norm": 1.0006472782878193,
      "learning_rate": 3.6093489004618286e-08,
      "loss": 0.6268,
      "step": 15470
    },
    {
      "epoch": 0.9926234765875561,
      "grad_norm": 0.7931648933621266,
      "learning_rate": 3.314852328389906e-08,
      "loss": 0.6005,
      "step": 15475
    },
    {
      "epoch": 0.9929441949967928,
      "grad_norm": 0.9041277771423232,
      "learning_rate": 3.032884815866721e-08,
      "loss": 0.5324,
      "step": 15480
    },
    {
      "epoch": 0.9932649134060295,
      "grad_norm": 0.9494072939119311,
      "learning_rate": 2.7634467162873657e-08,
      "loss": 0.7065,
      "step": 15485
    },
    {
      "epoch": 0.9935856318152662,
      "grad_norm": 0.6280167222373476,
      "learning_rate": 2.506538367345046e-08,
      "loss": 0.6061,
      "step": 15490
    },
    {
      "epoch": 0.9939063502245029,
      "grad_norm": 0.9951079606352037,
      "learning_rate": 2.2621600910288644e-08,
      "loss": 0.6444,
      "step": 15495
    },
    {
      "epoch": 0.9942270686337396,
      "grad_norm": 0.5695988637172767,
      "learning_rate": 2.0303121936227077e-08,
      "loss": 0.5318,
      "step": 15500
    },
    {
      "epoch": 0.9945477870429763,
      "grad_norm": 0.7104107796380682,
      "learning_rate": 1.8109949657074687e-08,
      "loss": 0.584,
      "step": 15505
    },
    {
      "epoch": 0.994868505452213,
      "grad_norm": 0.9519239668806431,
      "learning_rate": 1.6042086821566048e-08,
      "loss": 0.6069,
      "step": 15510
    },
    {
      "epoch": 0.9951892238614497,
      "grad_norm": 1.0531482909821168,
      "learning_rate": 1.409953602140579e-08,
      "loss": 0.6419,
      "step": 15515
    },
    {
      "epoch": 0.9955099422706863,
      "grad_norm": 0.8960669638227693,
      "learning_rate": 1.2282299691235289e-08,
      "loss": 0.6139,
      "step": 15520
    },
    {
      "epoch": 0.995830660679923,
      "grad_norm": 1.4364607207448494,
      "learning_rate": 1.059038010863267e-08,
      "loss": 0.557,
      "step": 15525
    },
    {
      "epoch": 0.9961513790891597,
      "grad_norm": 0.7870715712258225,
      "learning_rate": 9.02377939412391e-09,
      "loss": 0.6829,
      "step": 15530
    },
    {
      "epoch": 0.9964720974983964,
      "grad_norm": 0.6681758560958523,
      "learning_rate": 7.582499511160635e-09,
      "loss": 0.6894,
      "step": 15535
    },
    {
      "epoch": 0.9967928159076331,
      "grad_norm": 0.7692932903463889,
      "learning_rate": 6.266542266120112e-09,
      "loss": 0.6775,
      "step": 15540
    },
    {
      "epoch": 0.9971135343168698,
      "grad_norm": 1.2971219190629335,
      "learning_rate": 5.0759093083385665e-09,
      "loss": 0.6272,
      "step": 15545
    },
    {
      "epoch": 0.9974342527261065,
      "grad_norm": 0.6500496471556959,
      "learning_rate": 4.010602130033458e-09,
      "loss": 0.6068,
      "step": 15550
    },
    {
      "epoch": 0.9977549711353432,
      "grad_norm": 0.686167526298323,
      "learning_rate": 3.0706220664034057e-09,
      "loss": 0.6119,
      "step": 15555
    },
    {
      "epoch": 0.9980756895445798,
      "grad_norm": 0.7818918449822959,
      "learning_rate": 2.255970295539367e-09,
      "loss": 0.6275,
      "step": 15560
    },
    {
      "epoch": 0.9983964079538166,
      "grad_norm": 0.7349503006612832,
      "learning_rate": 1.5666478384579464e-09,
      "loss": 0.7661,
      "step": 15565
    },
    {
      "epoch": 0.9987171263630532,
      "grad_norm": 0.7447321528022689,
      "learning_rate": 1.0026555591013952e-09,
      "loss": 0.7204,
      "step": 15570
    },
    {
      "epoch": 0.99903784477229,
      "grad_norm": 0.5813111087659052,
      "learning_rate": 5.639941643376112e-10,
      "loss": 0.6803,
      "step": 15575
    },
    {
      "epoch": 0.9993585631815266,
      "grad_norm": 0.7896016109360797,
      "learning_rate": 2.5066420393793365e-10,
      "loss": 0.7841,
      "step": 15580
    },
    {
      "epoch": 0.9996792815907634,
      "grad_norm": 0.8103436457382939,
      "learning_rate": 6.266607062155316e-11,
      "loss": 0.5852,
      "step": 15585
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9796619082932287,
      "learning_rate": 0.0,
      "loss": 0.7238,
      "step": 15590
    },
    {
      "epoch": 1.0,
      "step": 15590,
      "total_flos": 1.6764562374852608e+16,
      "train_loss": 0.0,
      "train_runtime": 0.0156,
      "train_samples_per_second": 6662118.554,
      "train_steps_per_second": 104099.609
    }
  ],
  "logging_steps": 5,
  "max_steps": 1624,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 10,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.6764562374852608e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}