{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9996843204390374,
  "eval_steps": 500,
  "global_step": 2573,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0003885286904154829,
      "grad_norm": 1.0859375,
      "learning_rate": 7.751937984496125e-07,
      "loss": 2.7508,
      "step": 1
    },
    {
      "epoch": 0.0007770573808309658,
      "grad_norm": 0.875,
      "learning_rate": 1.550387596899225e-06,
      "loss": 2.6944,
      "step": 2
    },
    {
      "epoch": 0.0011655860712464486,
      "grad_norm": 0.94921875,
      "learning_rate": 2.325581395348837e-06,
      "loss": 2.6677,
      "step": 3
    },
    {
      "epoch": 0.0015541147616619315,
      "grad_norm": 0.89453125,
      "learning_rate": 3.10077519379845e-06,
      "loss": 2.6969,
      "step": 4
    },
    {
      "epoch": 0.0019426434520774143,
      "grad_norm": 0.81640625,
      "learning_rate": 3.875968992248062e-06,
      "loss": 2.6918,
      "step": 5
    },
    {
      "epoch": 0.002331172142492897,
      "grad_norm": 0.8203125,
      "learning_rate": 4.651162790697674e-06,
      "loss": 2.5546,
      "step": 6
    },
    {
      "epoch": 0.00271970083290838,
      "grad_norm": 0.984375,
      "learning_rate": 5.426356589147287e-06,
      "loss": 2.6413,
      "step": 7
    },
    {
      "epoch": 0.003108229523323863,
      "grad_norm": 0.9453125,
      "learning_rate": 6.2015503875969e-06,
      "loss": 2.7262,
      "step": 8
    },
    {
      "epoch": 0.0034967582137393457,
      "grad_norm": 0.85546875,
      "learning_rate": 6.976744186046512e-06,
      "loss": 2.7526,
      "step": 9
    },
    {
      "epoch": 0.0038852869041548286,
      "grad_norm": 0.82421875,
      "learning_rate": 7.751937984496124e-06,
      "loss": 2.7219,
      "step": 10
    },
    {
      "epoch": 0.004273815594570312,
      "grad_norm": 0.8359375,
      "learning_rate": 8.527131782945736e-06,
      "loss": 2.7289,
      "step": 11
    },
    {
      "epoch": 0.004662344284985794,
      "grad_norm": 0.87109375,
      "learning_rate": 9.302325581395349e-06,
      "loss": 2.6555,
      "step": 12
    },
    {
      "epoch": 0.005050872975401278,
      "grad_norm": 0.79296875,
      "learning_rate": 1.0077519379844961e-05,
      "loss": 2.6741,
      "step": 13
    },
    {
      "epoch": 0.00543940166581676,
      "grad_norm": 0.84375,
      "learning_rate": 1.0852713178294575e-05,
      "loss": 2.6498,
      "step": 14
    },
    {
      "epoch": 0.005827930356232243,
      "grad_norm": 0.87109375,
      "learning_rate": 1.1627906976744187e-05,
      "loss": 2.7851,
      "step": 15
    },
    {
      "epoch": 0.006216459046647726,
      "grad_norm": 0.828125,
      "learning_rate": 1.24031007751938e-05,
      "loss": 2.8024,
      "step": 16
    },
    {
      "epoch": 0.006604987737063209,
      "grad_norm": 0.92578125,
      "learning_rate": 1.3178294573643413e-05,
      "loss": 2.7433,
      "step": 17
    },
    {
      "epoch": 0.006993516427478691,
      "grad_norm": 0.91796875,
      "learning_rate": 1.3953488372093024e-05,
      "loss": 2.687,
      "step": 18
    },
    {
      "epoch": 0.007382045117894175,
      "grad_norm": 0.87109375,
      "learning_rate": 1.4728682170542638e-05,
      "loss": 2.7203,
      "step": 19
    },
    {
      "epoch": 0.007770573808309657,
      "grad_norm": 0.96484375,
      "learning_rate": 1.5503875968992248e-05,
      "loss": 2.6724,
      "step": 20
    },
    {
      "epoch": 0.00815910249872514,
      "grad_norm": 0.8203125,
      "learning_rate": 1.6279069767441862e-05,
      "loss": 2.7522,
      "step": 21
    },
    {
      "epoch": 0.008547631189140623,
      "grad_norm": 0.87890625,
      "learning_rate": 1.7054263565891473e-05,
      "loss": 2.692,
      "step": 22
    },
    {
      "epoch": 0.008936159879556106,
      "grad_norm": 0.8125,
      "learning_rate": 1.7829457364341087e-05,
      "loss": 2.6184,
      "step": 23
    },
    {
      "epoch": 0.009324688569971588,
      "grad_norm": 0.7890625,
      "learning_rate": 1.8604651162790697e-05,
      "loss": 2.6716,
      "step": 24
    },
    {
      "epoch": 0.009713217260387071,
      "grad_norm": 0.81640625,
      "learning_rate": 1.937984496124031e-05,
      "loss": 2.689,
      "step": 25
    },
    {
      "epoch": 0.010101745950802555,
      "grad_norm": 0.81640625,
      "learning_rate": 2.0155038759689922e-05,
      "loss": 2.623,
      "step": 26
    },
    {
      "epoch": 0.010490274641218038,
      "grad_norm": 0.828125,
      "learning_rate": 2.0930232558139536e-05,
      "loss": 2.711,
      "step": 27
    },
    {
      "epoch": 0.01087880333163352,
      "grad_norm": 0.8125,
      "learning_rate": 2.170542635658915e-05,
      "loss": 2.7812,
      "step": 28
    },
    {
      "epoch": 0.011267332022049003,
      "grad_norm": 0.8046875,
      "learning_rate": 2.2480620155038764e-05,
      "loss": 2.7191,
      "step": 29
    },
    {
      "epoch": 0.011655860712464486,
      "grad_norm": 0.79296875,
      "learning_rate": 2.3255813953488374e-05,
      "loss": 2.7255,
      "step": 30
    },
    {
      "epoch": 0.012044389402879968,
      "grad_norm": 0.84375,
      "learning_rate": 2.4031007751937988e-05,
      "loss": 2.6695,
      "step": 31
    },
    {
      "epoch": 0.012432918093295452,
      "grad_norm": 0.82421875,
      "learning_rate": 2.48062015503876e-05,
      "loss": 2.7645,
      "step": 32
    },
    {
      "epoch": 0.012821446783710935,
      "grad_norm": 0.8046875,
      "learning_rate": 2.5581395348837212e-05,
      "loss": 2.7689,
      "step": 33
    },
    {
      "epoch": 0.013209975474126417,
      "grad_norm": 0.8125,
      "learning_rate": 2.6356589147286826e-05,
      "loss": 2.7137,
      "step": 34
    },
    {
      "epoch": 0.0135985041645419,
      "grad_norm": 0.83984375,
      "learning_rate": 2.7131782945736434e-05,
      "loss": 2.6915,
      "step": 35
    },
    {
      "epoch": 0.013987032854957383,
      "grad_norm": 0.76953125,
      "learning_rate": 2.7906976744186048e-05,
      "loss": 2.6823,
      "step": 36
    },
    {
      "epoch": 0.014375561545372867,
      "grad_norm": 0.796875,
      "learning_rate": 2.868217054263566e-05,
      "loss": 2.7233,
      "step": 37
    },
    {
      "epoch": 0.01476409023578835,
      "grad_norm": 0.77734375,
      "learning_rate": 2.9457364341085275e-05,
      "loss": 2.7272,
      "step": 38
    },
    {
      "epoch": 0.015152618926203832,
      "grad_norm": 0.8125,
      "learning_rate": 3.0232558139534883e-05,
      "loss": 2.7218,
      "step": 39
    },
    {
      "epoch": 0.015541147616619315,
      "grad_norm": 0.80078125,
      "learning_rate": 3.1007751937984497e-05,
      "loss": 2.7278,
      "step": 40
    },
    {
      "epoch": 0.015929676307034797,
      "grad_norm": 0.828125,
      "learning_rate": 3.1782945736434114e-05,
      "loss": 2.7345,
      "step": 41
    },
    {
      "epoch": 0.01631820499745028,
      "grad_norm": 0.8125,
      "learning_rate": 3.2558139534883724e-05,
      "loss": 2.6648,
      "step": 42
    },
    {
      "epoch": 0.016706733687865762,
      "grad_norm": 0.86328125,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 2.6722,
      "step": 43
    },
    {
      "epoch": 0.017095262378281247,
      "grad_norm": 0.7734375,
      "learning_rate": 3.4108527131782945e-05,
      "loss": 2.7215,
      "step": 44
    },
    {
      "epoch": 0.017483791068696727,
      "grad_norm": 0.80859375,
      "learning_rate": 3.488372093023256e-05,
      "loss": 2.7325,
      "step": 45
    },
    {
      "epoch": 0.01787231975911221,
      "grad_norm": 0.8359375,
      "learning_rate": 3.565891472868217e-05,
      "loss": 2.7248,
      "step": 46
    },
    {
      "epoch": 0.018260848449527696,
      "grad_norm": 0.81640625,
      "learning_rate": 3.6434108527131784e-05,
      "loss": 2.6442,
      "step": 47
    },
    {
      "epoch": 0.018649377139943177,
      "grad_norm": 0.75390625,
      "learning_rate": 3.7209302325581394e-05,
      "loss": 2.6807,
      "step": 48
    },
    {
      "epoch": 0.01903790583035866,
      "grad_norm": 0.75,
      "learning_rate": 3.798449612403101e-05,
      "loss": 2.5396,
      "step": 49
    },
    {
      "epoch": 0.019426434520774142,
      "grad_norm": 0.8359375,
      "learning_rate": 3.875968992248062e-05,
      "loss": 2.6923,
      "step": 50
    },
    {
      "epoch": 0.019814963211189626,
      "grad_norm": 0.796875,
      "learning_rate": 3.953488372093023e-05,
      "loss": 2.5531,
      "step": 51
    },
    {
      "epoch": 0.02020349190160511,
      "grad_norm": 0.77734375,
      "learning_rate": 4.0310077519379843e-05,
      "loss": 2.6413,
      "step": 52
    },
    {
      "epoch": 0.02059202059202059,
      "grad_norm": 0.79296875,
      "learning_rate": 4.108527131782946e-05,
      "loss": 2.6221,
      "step": 53
    },
    {
      "epoch": 0.020980549282436076,
      "grad_norm": 0.78515625,
      "learning_rate": 4.186046511627907e-05,
      "loss": 2.6596,
      "step": 54
    },
    {
      "epoch": 0.021369077972851556,
      "grad_norm": 0.76953125,
      "learning_rate": 4.263565891472868e-05,
      "loss": 2.6862,
      "step": 55
    },
    {
      "epoch": 0.02175760666326704,
      "grad_norm": 0.78125,
      "learning_rate": 4.34108527131783e-05,
      "loss": 2.7227,
      "step": 56
    },
    {
      "epoch": 0.022146135353682525,
      "grad_norm": 0.7890625,
      "learning_rate": 4.418604651162791e-05,
      "loss": 2.7295,
      "step": 57
    },
    {
      "epoch": 0.022534664044098006,
      "grad_norm": 0.7578125,
      "learning_rate": 4.496124031007753e-05,
      "loss": 2.7031,
      "step": 58
    },
    {
      "epoch": 0.02292319273451349,
      "grad_norm": 0.87890625,
      "learning_rate": 4.573643410852713e-05,
      "loss": 2.7625,
      "step": 59
    },
    {
      "epoch": 0.02331172142492897,
      "grad_norm": 0.76953125,
      "learning_rate": 4.651162790697675e-05,
      "loss": 2.7127,
      "step": 60
    },
    {
      "epoch": 0.023700250115344455,
      "grad_norm": 0.80078125,
      "learning_rate": 4.728682170542636e-05,
      "loss": 2.6414,
      "step": 61
    },
    {
      "epoch": 0.024088778805759936,
      "grad_norm": 0.77734375,
      "learning_rate": 4.8062015503875976e-05,
      "loss": 2.6865,
      "step": 62
    },
    {
      "epoch": 0.02447730749617542,
      "grad_norm": 0.82421875,
      "learning_rate": 4.883720930232558e-05,
      "loss": 2.6609,
      "step": 63
    },
    {
      "epoch": 0.024865836186590905,
      "grad_norm": 0.859375,
      "learning_rate": 4.96124031007752e-05,
      "loss": 2.7198,
      "step": 64
    },
    {
      "epoch": 0.025254364877006386,
      "grad_norm": 0.8125,
      "learning_rate": 5.038759689922481e-05,
      "loss": 2.6678,
      "step": 65
    },
    {
      "epoch": 0.02564289356742187,
      "grad_norm": 0.7578125,
      "learning_rate": 5.1162790697674425e-05,
      "loss": 2.7294,
      "step": 66
    },
    {
      "epoch": 0.02603142225783735,
      "grad_norm": 0.69140625,
      "learning_rate": 5.1937984496124036e-05,
      "loss": 2.6087,
      "step": 67
    },
    {
      "epoch": 0.026419950948252835,
      "grad_norm": 0.75390625,
      "learning_rate": 5.271317829457365e-05,
      "loss": 2.6123,
      "step": 68
    },
    {
      "epoch": 0.02680847963866832,
      "grad_norm": 0.76171875,
      "learning_rate": 5.348837209302326e-05,
      "loss": 2.6227,
      "step": 69
    },
    {
      "epoch": 0.0271970083290838,
      "grad_norm": 0.80859375,
      "learning_rate": 5.426356589147287e-05,
      "loss": 2.6388,
      "step": 70
    },
    {
      "epoch": 0.027585537019499284,
      "grad_norm": 0.79296875,
      "learning_rate": 5.503875968992248e-05,
      "loss": 2.6372,
      "step": 71
    },
    {
      "epoch": 0.027974065709914765,
      "grad_norm": 0.7578125,
      "learning_rate": 5.5813953488372095e-05,
      "loss": 2.6499,
      "step": 72
    },
    {
      "epoch": 0.02836259440033025,
      "grad_norm": 0.79296875,
      "learning_rate": 5.6589147286821706e-05,
      "loss": 2.6805,
      "step": 73
    },
    {
      "epoch": 0.028751123090745734,
      "grad_norm": 0.78125,
      "learning_rate": 5.736434108527132e-05,
      "loss": 2.646,
      "step": 74
    },
    {
      "epoch": 0.029139651781161215,
      "grad_norm": 0.83984375,
      "learning_rate": 5.8139534883720933e-05,
      "loss": 2.6443,
      "step": 75
    },
    {
      "epoch": 0.0295281804715767,
      "grad_norm": 0.8515625,
      "learning_rate": 5.891472868217055e-05,
      "loss": 2.7152,
      "step": 76
    },
    {
      "epoch": 0.02991670916199218,
      "grad_norm": 0.84375,
      "learning_rate": 5.9689922480620155e-05,
      "loss": 2.6011,
      "step": 77
    },
    {
      "epoch": 0.030305237852407664,
      "grad_norm": 0.84765625,
      "learning_rate": 6.0465116279069765e-05,
      "loss": 2.5998,
      "step": 78
    },
    {
      "epoch": 0.030693766542823145,
      "grad_norm": 0.8671875,
      "learning_rate": 6.124031007751938e-05,
      "loss": 2.6207,
      "step": 79
    },
    {
      "epoch": 0.03108229523323863,
      "grad_norm": 0.75390625,
      "learning_rate": 6.201550387596899e-05,
      "loss": 2.616,
      "step": 80
    },
    {
      "epoch": 0.03147082392365411,
      "grad_norm": 0.796875,
      "learning_rate": 6.27906976744186e-05,
      "loss": 2.6722,
      "step": 81
    },
    {
      "epoch": 0.031859352614069594,
      "grad_norm": 0.78125,
      "learning_rate": 6.356589147286823e-05,
      "loss": 2.5915,
      "step": 82
    },
    {
      "epoch": 0.032247881304485075,
      "grad_norm": 0.80078125,
      "learning_rate": 6.434108527131784e-05,
      "loss": 2.6935,
      "step": 83
    },
    {
      "epoch": 0.03263640999490056,
      "grad_norm": 0.80859375,
      "learning_rate": 6.511627906976745e-05,
      "loss": 2.5366,
      "step": 84
    },
    {
      "epoch": 0.033024938685316044,
      "grad_norm": 0.70703125,
      "learning_rate": 6.589147286821705e-05,
      "loss": 2.5981,
      "step": 85
    },
    {
      "epoch": 0.033413467375731525,
      "grad_norm": 0.765625,
      "learning_rate": 6.666666666666667e-05,
      "loss": 2.598,
      "step": 86
    },
    {
      "epoch": 0.03380199606614701,
      "grad_norm": 0.87109375,
      "learning_rate": 6.744186046511628e-05,
      "loss": 2.7113,
      "step": 87
    },
    {
      "epoch": 0.03419052475656249,
      "grad_norm": 0.83984375,
      "learning_rate": 6.821705426356589e-05,
      "loss": 2.6221,
      "step": 88
    },
    {
      "epoch": 0.034579053446977974,
      "grad_norm": 0.7890625,
      "learning_rate": 6.89922480620155e-05,
      "loss": 2.5659,
      "step": 89
    },
    {
      "epoch": 0.034967582137393455,
      "grad_norm": 0.83203125,
      "learning_rate": 6.976744186046513e-05,
      "loss": 2.6699,
      "step": 90
    },
    {
      "epoch": 0.03535611082780894,
      "grad_norm": 0.77734375,
      "learning_rate": 7.054263565891474e-05,
      "loss": 2.6132,
      "step": 91
    },
    {
      "epoch": 0.03574463951822442,
      "grad_norm": 0.78515625,
      "learning_rate": 7.131782945736435e-05,
      "loss": 2.536,
      "step": 92
    },
    {
      "epoch": 0.036133168208639904,
      "grad_norm": 0.89453125,
      "learning_rate": 7.209302325581396e-05,
      "loss": 2.6658,
      "step": 93
    },
    {
      "epoch": 0.03652169689905539,
      "grad_norm": 0.8359375,
      "learning_rate": 7.286821705426357e-05,
      "loss": 2.5863,
      "step": 94
    },
    {
      "epoch": 0.03691022558947087,
      "grad_norm": 0.734375,
      "learning_rate": 7.364341085271318e-05,
      "loss": 2.5564,
      "step": 95
    },
    {
      "epoch": 0.037298754279886354,
      "grad_norm": 0.8203125,
      "learning_rate": 7.441860465116279e-05,
      "loss": 2.627,
      "step": 96
    },
    {
      "epoch": 0.03768728297030184,
      "grad_norm": 0.94921875,
      "learning_rate": 7.519379844961241e-05,
      "loss": 2.6368,
      "step": 97
    },
    {
      "epoch": 0.03807581166071732,
      "grad_norm": 0.890625,
      "learning_rate": 7.596899224806202e-05,
      "loss": 2.5973,
      "step": 98
    },
    {
      "epoch": 0.0384643403511328,
      "grad_norm": 1.96875,
      "learning_rate": 7.674418604651163e-05,
      "loss": 2.5859,
      "step": 99
    },
    {
      "epoch": 0.038852869041548284,
      "grad_norm": 0.76953125,
      "learning_rate": 7.751937984496124e-05,
      "loss": 2.6385,
      "step": 100
    },
    {
      "epoch": 0.03924139773196377,
      "grad_norm": 0.85546875,
      "learning_rate": 7.829457364341086e-05,
      "loss": 2.6456,
      "step": 101
    },
    {
      "epoch": 0.03962992642237925,
      "grad_norm": 0.75,
      "learning_rate": 7.906976744186047e-05,
      "loss": 2.5322,
      "step": 102
    },
    {
      "epoch": 0.04001845511279473,
      "grad_norm": 0.78125,
      "learning_rate": 7.984496124031008e-05,
      "loss": 2.5148,
      "step": 103
    },
    {
      "epoch": 0.04040698380321022,
      "grad_norm": 0.78125,
      "learning_rate": 8.062015503875969e-05,
      "loss": 2.5003,
      "step": 104
    },
    {
      "epoch": 0.0407955124936257,
      "grad_norm": 0.87890625,
      "learning_rate": 8.139534883720931e-05,
      "loss": 2.6288,
      "step": 105
    },
    {
      "epoch": 0.04118404118404118,
      "grad_norm": 0.73828125,
      "learning_rate": 8.217054263565892e-05,
      "loss": 2.659,
      "step": 106
    },
    {
      "epoch": 0.041572569874456664,
      "grad_norm": 0.83984375,
      "learning_rate": 8.294573643410853e-05,
      "loss": 2.5254,
      "step": 107
    },
    {
      "epoch": 0.04196109856487215,
      "grad_norm": 0.79296875,
      "learning_rate": 8.372093023255814e-05,
      "loss": 2.5423,
      "step": 108
    },
    {
      "epoch": 0.04234962725528763,
      "grad_norm": 0.7890625,
      "learning_rate": 8.449612403100775e-05,
      "loss": 2.549,
      "step": 109
    },
    {
      "epoch": 0.04273815594570311,
      "grad_norm": 0.796875,
      "learning_rate": 8.527131782945736e-05,
      "loss": 2.6419,
      "step": 110
    },
    {
      "epoch": 0.0431266846361186,
      "grad_norm": 0.8671875,
      "learning_rate": 8.604651162790697e-05,
      "loss": 2.5485,
      "step": 111
    },
    {
      "epoch": 0.04351521332653408,
      "grad_norm": 0.8515625,
      "learning_rate": 8.68217054263566e-05,
      "loss": 2.5752,
      "step": 112
    },
    {
      "epoch": 0.04390374201694956,
      "grad_norm": 0.84375,
      "learning_rate": 8.759689922480621e-05,
      "loss": 2.5693,
      "step": 113
    },
    {
      "epoch": 0.04429227070736505,
      "grad_norm": 0.828125,
      "learning_rate": 8.837209302325582e-05,
      "loss": 2.5739,
      "step": 114
    },
    {
      "epoch": 0.04468079939778053,
      "grad_norm": 0.83203125,
      "learning_rate": 8.914728682170543e-05,
      "loss": 2.5961,
      "step": 115
    },
    {
      "epoch": 0.04506932808819601,
      "grad_norm": 0.78125,
      "learning_rate": 8.992248062015505e-05,
      "loss": 2.5351,
      "step": 116
    },
    {
      "epoch": 0.04545785677861149,
      "grad_norm": 0.76953125,
      "learning_rate": 9.069767441860465e-05,
      "loss": 2.5145,
      "step": 117
    },
    {
      "epoch": 0.04584638546902698,
      "grad_norm": 0.796875,
      "learning_rate": 9.147286821705426e-05,
      "loss": 2.5982,
      "step": 118
    },
    {
      "epoch": 0.04623491415944246,
      "grad_norm": 0.75390625,
      "learning_rate": 9.224806201550387e-05,
      "loss": 2.5882,
      "step": 119
    },
    {
      "epoch": 0.04662344284985794,
      "grad_norm": 0.84765625,
      "learning_rate": 9.30232558139535e-05,
      "loss": 2.6582,
      "step": 120
    },
    {
      "epoch": 0.04701197154027343,
      "grad_norm": 0.83203125,
      "learning_rate": 9.379844961240311e-05,
      "loss": 2.5619,
      "step": 121
    },
    {
      "epoch": 0.04740050023068891,
      "grad_norm": 0.90234375,
      "learning_rate": 9.457364341085272e-05,
      "loss": 2.588,
      "step": 122
    },
    {
      "epoch": 0.04778902892110439,
      "grad_norm": 0.7734375,
      "learning_rate": 9.534883720930233e-05,
      "loss": 2.5661,
      "step": 123
    },
    {
      "epoch": 0.04817755761151987,
      "grad_norm": 0.83984375,
      "learning_rate": 9.612403100775195e-05,
      "loss": 2.5462,
      "step": 124
    },
    {
      "epoch": 0.04856608630193536,
      "grad_norm": 0.8203125,
      "learning_rate": 9.689922480620155e-05,
      "loss": 2.5859,
      "step": 125
    },
    {
      "epoch": 0.04895461499235084,
      "grad_norm": 0.89453125,
      "learning_rate": 9.767441860465116e-05,
      "loss": 2.6039,
      "step": 126
    },
    {
      "epoch": 0.04934314368276632,
      "grad_norm": 0.84765625,
      "learning_rate": 9.844961240310078e-05,
      "loss": 2.6323,
      "step": 127
    },
    {
      "epoch": 0.04973167237318181,
      "grad_norm": 0.75390625,
      "learning_rate": 9.92248062015504e-05,
      "loss": 2.5727,
      "step": 128
    },
    {
      "epoch": 0.05012020106359729,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001,
      "loss": 2.604,
      "step": 129
    },
    {
      "epoch": 0.05050872975401277,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00010077519379844962,
      "loss": 2.5792,
      "step": 130
    },
    {
      "epoch": 0.05089725844442826,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00010155038759689923,
      "loss": 2.5262,
      "step": 131
    },
    {
      "epoch": 0.05128578713484374,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00010232558139534885,
      "loss": 2.6018,
      "step": 132
    },
    {
      "epoch": 0.05167431582525922,
      "grad_norm": 0.875,
      "learning_rate": 0.00010310077519379846,
      "loss": 2.5635,
      "step": 133
    },
    {
      "epoch": 0.0520628445156747,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00010387596899224807,
      "loss": 2.5924,
      "step": 134
    },
    {
      "epoch": 0.05245137320609019,
      "grad_norm": 0.84375,
      "learning_rate": 0.00010465116279069768,
      "loss": 2.6196,
      "step": 135
    },
    {
      "epoch": 0.05283990189650567,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001054263565891473,
      "loss": 2.5211,
      "step": 136
    },
    {
      "epoch": 0.05322843058692115,
      "grad_norm": 0.78125,
      "learning_rate": 0.00010620155038759692,
      "loss": 2.5739,
      "step": 137
    },
    {
      "epoch": 0.05361695927733664,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00010697674418604651,
      "loss": 2.5201,
      "step": 138
    },
    {
      "epoch": 0.05400548796775212,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00010775193798449612,
      "loss": 2.5191,
      "step": 139
    },
    {
      "epoch": 0.0543940166581676,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00010852713178294573,
      "loss": 2.5712,
      "step": 140
    },
    {
      "epoch": 0.05478254534858308,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00010930232558139534,
      "loss": 2.5579,
      "step": 141
    },
    {
      "epoch": 0.05517107403899857,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00011007751937984496,
      "loss": 2.5769,
      "step": 142
    },
    {
      "epoch": 0.05555960272941405,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00011085271317829458,
      "loss": 2.5025,
      "step": 143
    },
    {
      "epoch": 0.05594813141982953,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00011162790697674419,
      "loss": 2.4641,
      "step": 144
    },
    {
      "epoch": 0.05633666011024502,
      "grad_norm": 0.78125,
      "learning_rate": 0.0001124031007751938,
      "loss": 2.4801,
      "step": 145
    },
    {
      "epoch": 0.0567251888006605,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00011317829457364341,
      "loss": 2.5683,
      "step": 146
    },
    {
      "epoch": 0.05711371749107598,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00011395348837209304,
      "loss": 2.5542,
      "step": 147
    },
    {
      "epoch": 0.05750224618149147,
      "grad_norm": 0.796875,
      "learning_rate": 0.00011472868217054265,
      "loss": 2.4648,
      "step": 148
    },
    {
      "epoch": 0.05789077487190695,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00011550387596899226,
      "loss": 2.548,
      "step": 149
    },
    {
      "epoch": 0.05827930356232243,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00011627906976744187,
      "loss": 2.6214,
      "step": 150
    },
    {
      "epoch": 0.05866783225273791,
      "grad_norm": 0.859375,
      "learning_rate": 0.00011705426356589149,
      "loss": 2.5642,
      "step": 151
    },
    {
      "epoch": 0.0590563609431534,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001178294573643411,
      "loss": 2.5584,
      "step": 152
    },
    {
      "epoch": 0.05944488963356888,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00011860465116279071,
      "loss": 2.5286,
      "step": 153
    },
    {
      "epoch": 0.05983341832398436,
      "grad_norm": 0.875,
      "learning_rate": 0.00011937984496124031,
      "loss": 2.6014,
      "step": 154
    },
    {
      "epoch": 0.06022194701439985,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00012015503875968992,
      "loss": 2.5721,
      "step": 155
    },
    {
      "epoch": 0.06061047570481533,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00012093023255813953,
      "loss": 2.5731,
      "step": 156
    },
    {
      "epoch": 0.06099900439523081,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00012170542635658914,
      "loss": 2.4835,
      "step": 157
    },
    {
      "epoch": 0.06138753308564629,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00012248062015503876,
      "loss": 2.5875,
      "step": 158
    },
    {
      "epoch": 0.06177606177606178,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00012325581395348836,
      "loss": 2.5394,
      "step": 159
    },
    {
      "epoch": 0.06216459046647726,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00012403100775193799,
      "loss": 2.5477,
      "step": 160
    },
    {
      "epoch": 0.06255311915689274,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001248062015503876,
      "loss": 2.4915,
      "step": 161
    },
    {
      "epoch": 0.06294164784730823,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001255813953488372,
      "loss": 2.5073,
      "step": 162
    },
    {
      "epoch": 0.0633301765377237,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00012635658914728683,
      "loss": 2.5843,
      "step": 163
    },
    {
      "epoch": 0.06371870522813919,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00012713178294573646,
      "loss": 2.5689,
      "step": 164
    },
    {
      "epoch": 0.06410723391855468,
      "grad_norm": 0.796875,
      "learning_rate": 0.00012790697674418605,
      "loss": 2.5615,
      "step": 165
    },
    {
      "epoch": 0.06449576260897015,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00012868217054263568,
      "loss": 2.519,
      "step": 166
    },
    {
      "epoch": 0.06488429129938564,
      "grad_norm": 0.90625,
      "learning_rate": 0.00012945736434108527,
      "loss": 2.596,
      "step": 167
    },
    {
      "epoch": 0.06527281998980113,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001302325581395349,
      "loss": 2.5081,
      "step": 168
    },
    {
      "epoch": 0.0656613486802166,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00013100775193798452,
      "loss": 2.5528,
      "step": 169
    },
    {
      "epoch": 0.06604987737063209,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001317829457364341,
      "loss": 2.5174,
      "step": 170
    },
    {
      "epoch": 0.06643840606104758,
      "grad_norm": 0.765625,
      "learning_rate": 0.00013255813953488372,
      "loss": 2.5159,
      "step": 171
    },
    {
      "epoch": 0.06682693475146305,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00013333333333333334,
      "loss": 2.5556,
      "step": 172
    },
    {
      "epoch": 0.06721546344187854,
      "grad_norm": 0.8125,
      "learning_rate": 0.00013410852713178294,
      "loss": 2.5453,
      "step": 173
    },
    {
      "epoch": 0.06760399213229402,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00013488372093023256,
      "loss": 2.5447,
      "step": 174
    },
    {
      "epoch": 0.0679925208227095,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00013565891472868218,
      "loss": 2.5426,
      "step": 175
    },
    {
      "epoch": 0.06838104951312499,
      "grad_norm": 0.765625,
      "learning_rate": 0.00013643410852713178,
      "loss": 2.5165,
      "step": 176
    },
    {
      "epoch": 0.06876957820354047,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001372093023255814,
      "loss": 2.4734,
      "step": 177
    },
    {
      "epoch": 0.06915810689395595,
      "grad_norm": 0.78515625,
      "learning_rate": 0.000137984496124031,
      "loss": 2.4351,
      "step": 178
    },
    {
      "epoch": 0.06954663558437144,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00013875968992248063,
      "loss": 2.5237,
      "step": 179
    },
    {
      "epoch": 0.06993516427478691,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00013953488372093025,
      "loss": 2.5439,
      "step": 180
    },
    {
      "epoch": 0.0703236929652024,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00014031007751937985,
      "loss": 2.4856,
      "step": 181
    },
    {
      "epoch": 0.07071222165561789,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00014108527131782947,
      "loss": 2.4619,
      "step": 182
    },
    {
      "epoch": 0.07110075034603336,
      "grad_norm": 0.8046875,
      "learning_rate": 0.0001418604651162791,
      "loss": 2.5004,
      "step": 183
    },
    {
      "epoch": 0.07148927903644885,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001426356589147287,
      "loss": 2.4954,
      "step": 184
    },
    {
      "epoch": 0.07187780772686433,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00014341085271317832,
      "loss": 2.5676,
      "step": 185
    },
    {
      "epoch": 0.07226633641727981,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00014418604651162791,
      "loss": 2.5853,
      "step": 186
    },
    {
      "epoch": 0.0726548651076953,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001449612403100775,
      "loss": 2.4586,
      "step": 187
    },
    {
      "epoch": 0.07304339379811078,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00014573643410852714,
      "loss": 2.4955,
      "step": 188
    },
    {
      "epoch": 0.07343192248852626,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00014651162790697673,
      "loss": 2.5761,
      "step": 189
    },
    {
      "epoch": 0.07382045117894175,
      "grad_norm": 0.796875,
      "learning_rate": 0.00014728682170542636,
      "loss": 2.5159,
      "step": 190
    },
    {
      "epoch": 0.07420897986935723,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00014806201550387598,
      "loss": 2.4549,
      "step": 191
    },
    {
      "epoch": 0.07459750855977271,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00014883720930232558,
      "loss": 2.4911,
      "step": 192
    },
    {
      "epoch": 0.0749860372501882,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001496124031007752,
      "loss": 2.5391,
      "step": 193
    },
    {
      "epoch": 0.07537456594060368,
      "grad_norm": 0.875,
      "learning_rate": 0.00015038759689922483,
      "loss": 2.5419,
      "step": 194
    },
    {
      "epoch": 0.07576309463101916,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00015116279069767442,
      "loss": 2.5355,
      "step": 195
    },
    {
      "epoch": 0.07615162332143464,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00015193798449612405,
      "loss": 2.5481,
      "step": 196
    },
    {
      "epoch": 0.07654015201185012,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00015271317829457364,
      "loss": 2.5098,
      "step": 197
    },
    {
      "epoch": 0.0769286807022656,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00015348837209302327,
      "loss": 2.47,
      "step": 198
    },
    {
      "epoch": 0.0773172093926811,
      "grad_norm": 0.875,
      "learning_rate": 0.0001542635658914729,
      "loss": 2.5496,
      "step": 199
    },
    {
      "epoch": 0.07770573808309657,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001550387596899225,
      "loss": 2.579,
      "step": 200
    },
    {
      "epoch": 0.07809426677351206,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001558139534883721,
      "loss": 2.538,
      "step": 201
    },
    {
      "epoch": 0.07848279546392754,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001565891472868217,
      "loss": 2.4561,
      "step": 202
    },
    {
      "epoch": 0.07887132415434302,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001573643410852713,
      "loss": 2.4939,
      "step": 203
    },
    {
      "epoch": 0.0792598528447585,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00015813953488372093,
      "loss": 2.5002,
      "step": 204
    },
    {
      "epoch": 0.07964838153517399,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00015891472868217056,
      "loss": 2.4702,
      "step": 205
    },
    {
      "epoch": 0.08003691022558947,
      "grad_norm": 0.8125,
      "learning_rate": 0.00015968992248062015,
      "loss": 2.4821,
      "step": 206
    },
    {
      "epoch": 0.08042543891600495,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00016046511627906978,
      "loss": 2.5318,
      "step": 207
    },
    {
      "epoch": 0.08081396760642044,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00016124031007751937,
      "loss": 2.4792,
      "step": 208
    },
    {
      "epoch": 0.08120249629683592,
      "grad_norm": 0.78515625,
      "learning_rate": 0.000162015503875969,
      "loss": 2.432,
      "step": 209
    },
    {
      "epoch": 0.0815910249872514,
      "grad_norm": 0.90625,
      "learning_rate": 0.00016279069767441862,
      "loss": 2.5145,
      "step": 210
    },
    {
      "epoch": 0.08197955367766689,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00016356589147286822,
      "loss": 2.4505,
      "step": 211
    },
    {
      "epoch": 0.08236808236808237,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00016434108527131784,
      "loss": 2.5375,
      "step": 212
    },
    {
      "epoch": 0.08275661105849785,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00016511627906976747,
      "loss": 2.4642,
      "step": 213
    },
    {
      "epoch": 0.08314513974891333,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00016589147286821706,
      "loss": 2.5279,
      "step": 214
    },
    {
      "epoch": 0.08353366843932881,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001666666666666667,
      "loss": 2.5083,
      "step": 215
    },
    {
      "epoch": 0.0839221971297443,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00016744186046511629,
      "loss": 2.5472,
      "step": 216
    },
    {
      "epoch": 0.08431072582015978,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001682170542635659,
      "loss": 2.5635,
      "step": 217
    },
    {
      "epoch": 0.08469925451057526,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001689922480620155,
      "loss": 2.4878,
      "step": 218
    },
    {
      "epoch": 0.08508778320099075,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001697674418604651,
      "loss": 2.5143,
      "step": 219
    },
    {
      "epoch": 0.08547631189140623,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00017054263565891473,
      "loss": 2.5344,
      "step": 220
    },
    {
      "epoch": 0.08586484058182171,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00017131782945736435,
      "loss": 2.5229,
      "step": 221
    },
    {
      "epoch": 0.0862533692722372,
      "grad_norm": 0.9375,
      "learning_rate": 0.00017209302325581395,
      "loss": 2.5182,
      "step": 222
    },
    {
      "epoch": 0.08664189796265268,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00017286821705426357,
      "loss": 2.5788,
      "step": 223
    },
    {
      "epoch": 0.08703042665306816,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001736434108527132,
      "loss": 2.4586,
      "step": 224
    },
    {
      "epoch": 0.08741895534348365,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001744186046511628,
      "loss": 2.4999,
      "step": 225
    },
    {
      "epoch": 0.08780748403389912,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00017519379844961242,
      "loss": 2.4922,
      "step": 226
    },
    {
      "epoch": 0.08819601272431461,
      "grad_norm": 1.15625,
      "learning_rate": 0.00017596899224806201,
      "loss": 2.5033,
      "step": 227
    },
    {
      "epoch": 0.0885845414147301,
      "grad_norm": 0.953125,
      "learning_rate": 0.00017674418604651164,
      "loss": 2.5631,
      "step": 228
    },
    {
      "epoch": 0.08897307010514557,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00017751937984496126,
      "loss": 2.5307,
      "step": 229
    },
    {
      "epoch": 0.08936159879556106,
      "grad_norm": 0.9375,
      "learning_rate": 0.00017829457364341086,
      "loss": 2.5699,
      "step": 230
    },
    {
      "epoch": 0.08975012748597654,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00017906976744186048,
      "loss": 2.5328,
      "step": 231
    },
    {
      "epoch": 0.09013865617639202,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001798449612403101,
      "loss": 2.4664,
      "step": 232
    },
    {
      "epoch": 0.09052718486680751,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001806201550387597,
      "loss": 2.4832,
      "step": 233
    },
    {
      "epoch": 0.09091571355722299,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001813953488372093,
      "loss": 2.5115,
      "step": 234
    },
    {
      "epoch": 0.09130424224763847,
      "grad_norm": 0.921875,
      "learning_rate": 0.00018217054263565893,
      "loss": 2.4564,
      "step": 235
    },
    {
      "epoch": 0.09169277093805396,
      "grad_norm": 0.9375,
      "learning_rate": 0.00018294573643410852,
      "loss": 2.4124,
      "step": 236
    },
    {
      "epoch": 0.09208129962846943,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018372093023255815,
      "loss": 2.4601,
      "step": 237
    },
    {
      "epoch": 0.09246982831888492,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00018449612403100774,
      "loss": 2.5447,
      "step": 238
    },
    {
      "epoch": 0.09285835700930041,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00018527131782945737,
      "loss": 2.4984,
      "step": 239
    },
    {
      "epoch": 0.09324688569971588,
      "grad_norm": 0.90234375,
      "learning_rate": 0.000186046511627907,
      "loss": 2.4448,
      "step": 240
    },
    {
      "epoch": 0.09363541439013137,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001868217054263566,
      "loss": 2.501,
      "step": 241
    },
    {
      "epoch": 0.09402394308054686,
      "grad_norm": 0.9375,
      "learning_rate": 0.00018759689922480621,
      "loss": 2.4614,
      "step": 242
    },
    {
      "epoch": 0.09441247177096233,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00018837209302325584,
      "loss": 2.5024,
      "step": 243
    },
    {
      "epoch": 0.09480100046137782,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00018914728682170543,
      "loss": 2.4858,
      "step": 244
    },
    {
      "epoch": 0.09518952915179331,
      "grad_norm": 1.015625,
      "learning_rate": 0.00018992248062015506,
      "loss": 2.55,
      "step": 245
    },
    {
      "epoch": 0.09557805784220878,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019069767441860466,
      "loss": 2.5271,
      "step": 246
    },
    {
      "epoch": 0.09596658653262427,
      "grad_norm": 0.875,
      "learning_rate": 0.00019147286821705428,
      "loss": 2.4523,
      "step": 247
    },
    {
      "epoch": 0.09635511522303974,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001922480620155039,
      "loss": 2.4744,
      "step": 248
    },
    {
      "epoch": 0.09674364391345523,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001930232558139535,
      "loss": 2.5274,
      "step": 249
    },
    {
      "epoch": 0.09713217260387072,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001937984496124031,
      "loss": 2.5121,
      "step": 250
    },
    {
      "epoch": 0.0975207012942862,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019457364341085272,
      "loss": 2.482,
      "step": 251
    },
    {
      "epoch": 0.09790922998470168,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019534883720930232,
      "loss": 2.5032,
      "step": 252
    },
    {
      "epoch": 0.09829775867511717,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019612403100775194,
      "loss": 2.5395,
      "step": 253
    },
    {
      "epoch": 0.09868628736553264,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019689922480620157,
      "loss": 2.485,
      "step": 254
    },
    {
      "epoch": 0.09907481605594813,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019767441860465116,
      "loss": 2.4981,
      "step": 255
    },
    {
      "epoch": 0.09946334474636362,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001984496124031008,
      "loss": 2.5291,
      "step": 256
    },
    {
      "epoch": 0.09985187343677909,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019922480620155039,
      "loss": 2.4618,
      "step": 257
    },
    {
      "epoch": 0.10024040212719458,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0002,
      "loss": 2.4954,
      "step": 258
    },
    {
      "epoch": 0.10062893081761007,
      "grad_norm": 0.828125,
      "learning_rate": 0.000199999907919495,
      "loss": 2.5519,
      "step": 259
    },
    {
      "epoch": 0.10101745950802554,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019999963167814967,
      "loss": 2.5409,
      "step": 260
    },
    {
      "epoch": 0.10140598819844103,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001999991712764726,
      "loss": 2.4918,
      "step": 261
    },
    {
      "epoch": 0.10179451688885652,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001999985267153118,
      "loss": 2.5241,
      "step": 262
    },
    {
      "epoch": 0.10218304557927199,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019999769799585422,
      "loss": 2.5064,
      "step": 263
    },
    {
      "epoch": 0.10257157426968748,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019999668511962607,
      "loss": 2.5119,
      "step": 264
    },
    {
      "epoch": 0.10296010296010295,
      "grad_norm": 1.0,
      "learning_rate": 0.00019999548808849268,
      "loss": 2.4937,
      "step": 265
    },
    {
      "epoch": 0.10334863165051844,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019999410690465852,
      "loss": 2.5707,
      "step": 266
    },
    {
      "epoch": 0.10373716034093393,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019999254157066716,
      "loss": 2.458,
      "step": 267
    },
    {
      "epoch": 0.1041256890313494,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019999079208940135,
      "loss": 2.5633,
      "step": 268
    },
    {
      "epoch": 0.10451421772176489,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019998885846408295,
      "loss": 2.5172,
      "step": 269
    },
    {
      "epoch": 0.10490274641218038,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019998674069827293,
      "loss": 2.531,
      "step": 270
    },
    {
      "epoch": 0.10529127510259585,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019998443879587144,
      "loss": 2.5153,
      "step": 271
    },
    {
      "epoch": 0.10567980379301134,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019998195276111762,
      "loss": 2.4719,
      "step": 272
    },
    {
      "epoch": 0.10606833248342683,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019997928259858985,
      "loss": 2.4519,
      "step": 273
    },
    {
      "epoch": 0.1064568611738423,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019997642831320547,
      "loss": 2.3848,
      "step": 274
    },
    {
      "epoch": 0.10684538986425779,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019997338991022096,
      "loss": 2.4939,
      "step": 275
    },
    {
      "epoch": 0.10723391855467328,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019997016739523192,
      "loss": 2.4016,
      "step": 276
    },
    {
      "epoch": 0.10762244724508875,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019996676077417292,
      "loss": 2.5356,
      "step": 277
    },
    {
      "epoch": 0.10801097593550424,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019996317005331767,
      "loss": 2.5133,
      "step": 278
    },
    {
      "epoch": 0.10839950462591973,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019995939523927882,
      "loss": 2.4938,
      "step": 279
    },
    {
      "epoch": 0.1087880333163352,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019995543633900817,
      "loss": 2.4887,
      "step": 280
    },
    {
      "epoch": 0.10917656200675069,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019995129335979644,
      "loss": 2.4348,
      "step": 281
    },
    {
      "epoch": 0.10956509069716616,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019994696630927337,
      "loss": 2.4854,
      "step": 282
    },
    {
      "epoch": 0.10995361938758165,
      "grad_norm": 1.1484375,
      "learning_rate": 0.0001999424551954077,
      "loss": 2.517,
      "step": 283
    },
    {
      "epoch": 0.11034214807799714,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001999377600265072,
      "loss": 2.4481,
      "step": 284
    },
    {
      "epoch": 0.11073067676841261,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019993288081121845,
      "loss": 2.4257,
      "step": 285
    },
    {
      "epoch": 0.1111192054588281,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019992781755852712,
      "loss": 2.5133,
      "step": 286
    },
    {
      "epoch": 0.11150773414924359,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019992257027775777,
      "loss": 2.5131,
      "step": 287
    },
    {
      "epoch": 0.11189626283965906,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019991713897857377,
      "loss": 2.5679,
      "step": 288
    },
    {
      "epoch": 0.11228479153007455,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019991152367097753,
      "loss": 2.4768,
      "step": 289
    },
    {
      "epoch": 0.11267332022049004,
      "grad_norm": 0.99609375,
      "learning_rate": 0.0001999057243653102,
      "loss": 2.4971,
      "step": 290
    },
    {
      "epoch": 0.11306184891090551,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019989974107225184,
      "loss": 2.4146,
      "step": 291
    },
    {
      "epoch": 0.113450377601321,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001998935738028214,
      "loss": 2.4849,
      "step": 292
    },
    {
      "epoch": 0.11383890629173649,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019988722256837656,
      "loss": 2.5275,
      "step": 293
    },
    {
      "epoch": 0.11422743498215196,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001998806873806138,
      "loss": 2.4933,
      "step": 294
    },
    {
      "epoch": 0.11461596367256745,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019987396825156838,
      "loss": 2.4731,
      "step": 295
    },
    {
      "epoch": 0.11500449236298294,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019986706519361434,
      "loss": 2.489,
      "step": 296
    },
    {
      "epoch": 0.11539302105339841,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019985997821946443,
      "loss": 2.4249,
      "step": 297
    },
    {
      "epoch": 0.1157815497438139,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019985270734217006,
      "loss": 2.4817,
      "step": 298
    },
    {
      "epoch": 0.11617007843422937,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019984525257512142,
      "loss": 2.5107,
      "step": 299
    },
    {
      "epoch": 0.11655860712464486,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019983761393204717,
      "loss": 2.5681,
      "step": 300
    },
    {
      "epoch": 0.11694713581506035,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001998297914270148,
      "loss": 2.5334,
      "step": 301
    },
    {
      "epoch": 0.11733566450547582,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019982178507443028,
      "loss": 2.4393,
      "step": 302
    },
    {
      "epoch": 0.11772419319589131,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001998135948890382,
      "loss": 2.4662,
      "step": 303
    },
    {
      "epoch": 0.1181127218863068,
      "grad_norm": 0.875,
      "learning_rate": 0.0001998052208859217,
      "loss": 2.4961,
      "step": 304
    },
    {
      "epoch": 0.11850125057672227,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019979666308050242,
      "loss": 2.4746,
      "step": 305
    },
    {
      "epoch": 0.11888977926713776,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001997879214885405,
      "loss": 2.508,
      "step": 306
    },
    {
      "epoch": 0.11927830795755325,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001997789961261345,
      "loss": 2.4174,
      "step": 307
    },
    {
      "epoch": 0.11966683664796872,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019976988700972154,
      "loss": 2.567,
      "step": 308
    },
    {
      "epoch": 0.1200553653383842,
      "grad_norm": 0.875,
      "learning_rate": 0.00019976059415607698,
      "loss": 2.551,
      "step": 309
    },
    {
      "epoch": 0.1204438940287997,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001997511175823147,
      "loss": 2.5188,
      "step": 310
    },
    {
      "epoch": 0.12083242271921517,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019974145730588684,
      "loss": 2.5237,
      "step": 311
    },
    {
      "epoch": 0.12122095140963066,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019973161334458386,
      "loss": 2.4621,
      "step": 312
    },
    {
      "epoch": 0.12160948010004614,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019972158571653444,
      "loss": 2.4367,
      "step": 313
    },
    {
      "epoch": 0.12199800879046162,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019971137444020563,
      "loss": 2.3794,
      "step": 314
    },
    {
      "epoch": 0.1223865374808771,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019970097953440262,
      "loss": 2.402,
      "step": 315
    },
    {
      "epoch": 0.12277506617129258,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019969040101826873,
      "loss": 2.4515,
      "step": 316
    },
    {
      "epoch": 0.12316359486170807,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019967963891128547,
      "loss": 2.4462,
      "step": 317
    },
    {
      "epoch": 0.12355212355212356,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019966869323327248,
      "loss": 2.4375,
      "step": 318
    },
    {
      "epoch": 0.12394065224253903,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019965756400438739,
      "loss": 2.4505,
      "step": 319
    },
    {
      "epoch": 0.12432918093295452,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019964625124512596,
      "loss": 2.4783,
      "step": 320
    },
    {
      "epoch": 0.12471770962337,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019963475497632182,
      "loss": 2.4582,
      "step": 321
    },
    {
      "epoch": 0.12510623831378548,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019962307521914662,
      "loss": 2.5833,
      "step": 322
    },
    {
      "epoch": 0.12549476700420098,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019961121199510998,
      "loss": 2.4726,
      "step": 323
    },
    {
      "epoch": 0.12588329569461645,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019959916532605925,
      "loss": 2.5252,
      "step": 324
    },
    {
      "epoch": 0.12627182438503193,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019958693523417976,
      "loss": 2.512,
      "step": 325
    },
    {
      "epoch": 0.1266603530754474,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00019957452174199452,
      "loss": 2.4786,
      "step": 326
    },
    {
      "epoch": 0.1270488817658629,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001995619248723644,
      "loss": 2.5086,
      "step": 327
    },
    {
      "epoch": 0.12743741045627838,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019954914464848786,
      "loss": 2.5146,
      "step": 328
    },
    {
      "epoch": 0.12782593914669385,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00019953618109390115,
      "loss": 2.4517,
      "step": 329
    },
    {
      "epoch": 0.12821446783710935,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019952303423247805,
      "loss": 2.4473,
      "step": 330
    },
    {
      "epoch": 0.12860299652752483,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00019950970408842995,
      "loss": 2.4946,
      "step": 331
    },
    {
      "epoch": 0.1289915252179403,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019949619068630576,
      "loss": 2.3974,
      "step": 332
    },
    {
      "epoch": 0.1293800539083558,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019948249405099197,
      "loss": 2.5094,
      "step": 333
    },
    {
      "epoch": 0.12976858259877128,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019946861420771237,
      "loss": 2.4857,
      "step": 334
    },
    {
      "epoch": 0.13015711128918675,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019945455118202822,
      "loss": 2.5822,
      "step": 335
    },
    {
      "epoch": 0.13054563997960225,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001994403049998382,
      "loss": 2.4962,
      "step": 336
    },
    {
      "epoch": 0.13093416867001773,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019942587568737817,
      "loss": 2.5256,
      "step": 337
    },
    {
      "epoch": 0.1313226973604332,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001994112632712213,
      "loss": 2.4939,
      "step": 338
    },
    {
      "epoch": 0.1317112260508487,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019939646777827793,
      "loss": 2.4533,
      "step": 339
    },
    {
      "epoch": 0.13209975474126417,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001993814892357957,
      "loss": 2.476,
      "step": 340
    },
    {
      "epoch": 0.13248828343167965,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001993663276713591,
      "loss": 2.4901,
      "step": 341
    },
    {
      "epoch": 0.13287681212209515,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019935098311288995,
      "loss": 2.4806,
      "step": 342
    },
    {
      "epoch": 0.13326534081251062,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019933545558864686,
      "loss": 2.4668,
      "step": 343
    },
    {
      "epoch": 0.1336538695029261,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001993197451272255,
      "loss": 2.5187,
      "step": 344
    },
    {
      "epoch": 0.1340423981933416,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001993038517575584,
      "loss": 2.5422,
      "step": 345
    },
    {
      "epoch": 0.13443092688375707,
      "grad_norm": 0.9375,
      "learning_rate": 0.000199287775508915,
      "loss": 2.472,
      "step": 346
    },
    {
      "epoch": 0.13481945557417255,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019927151641090145,
      "loss": 2.4643,
      "step": 347
    },
    {
      "epoch": 0.13520798426458805,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00019925507449346065,
      "loss": 2.4486,
      "step": 348
    },
    {
      "epoch": 0.13559651295500352,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019923844978687224,
      "loss": 2.4472,
      "step": 349
    },
    {
      "epoch": 0.135985041645419,
      "grad_norm": 0.95703125,
      "learning_rate": 0.0001992216423217524,
      "loss": 2.3693,
      "step": 350
    },
    {
      "epoch": 0.1363735703358345,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019920465212905394,
      "loss": 2.4148,
      "step": 351
    },
    {
      "epoch": 0.13676209902624997,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019918747924006623,
      "loss": 2.559,
      "step": 352
    },
    {
      "epoch": 0.13715062771666545,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019917012368641496,
      "loss": 2.433,
      "step": 353
    },
    {
      "epoch": 0.13753915640708095,
      "grad_norm": 1.234375,
      "learning_rate": 0.00019915258550006235,
      "loss": 2.4333,
      "step": 354
    },
    {
      "epoch": 0.13792768509749642,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019913486471330685,
      "loss": 2.4635,
      "step": 355
    },
    {
      "epoch": 0.1383162137879119,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001991169613587833,
      "loss": 2.4907,
      "step": 356
    },
    {
      "epoch": 0.1387047424783274,
      "grad_norm": 1.0,
      "learning_rate": 0.00019909887546946261,
      "loss": 2.4457,
      "step": 357
    },
    {
      "epoch": 0.13909327116874287,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019908060707865202,
      "loss": 2.4854,
      "step": 358
    },
    {
      "epoch": 0.13948179985915835,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019906215621999474,
      "loss": 2.4735,
      "step": 359
    },
    {
      "epoch": 0.13987032854957382,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019904352292747008,
      "loss": 2.537,
      "step": 360
    },
    {
      "epoch": 0.14025885723998932,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019902470723539328,
      "loss": 2.5154,
      "step": 361
    },
    {
      "epoch": 0.1406473859304048,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019900570917841552,
      "loss": 2.4888,
      "step": 362
    },
    {
      "epoch": 0.14103591462082027,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001989865287915238,
      "loss": 2.4895,
      "step": 363
    },
    {
      "epoch": 0.14142444331123577,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019896716611004096,
      "loss": 2.464,
      "step": 364
    },
    {
      "epoch": 0.14181297200165124,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019894762116962544,
      "loss": 2.4514,
      "step": 365
    },
    {
      "epoch": 0.14220150069206672,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019892789400627147,
      "loss": 2.455,
      "step": 366
    },
    {
      "epoch": 0.14259002938248222,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001989079846563087,
      "loss": 2.4248,
      "step": 367
    },
    {
      "epoch": 0.1429785580728977,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019888789315640254,
      "loss": 2.5139,
      "step": 368
    },
    {
      "epoch": 0.14336708676331317,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019886761954355354,
      "loss": 2.4619,
      "step": 369
    },
    {
      "epoch": 0.14375561545372867,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019884716385509785,
      "loss": 2.517,
      "step": 370
    },
    {
      "epoch": 0.14414414414414414,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001988265261287069,
      "loss": 2.474,
      "step": 371
    },
    {
      "epoch": 0.14453267283455962,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019880570640238733,
      "loss": 2.511,
      "step": 372
    },
    {
      "epoch": 0.14492120152497512,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019878470471448094,
      "loss": 2.4679,
      "step": 373
    },
    {
      "epoch": 0.1453097302153906,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019876352110366466,
      "loss": 2.4047,
      "step": 374
    },
    {
      "epoch": 0.14569825890580607,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001987421556089504,
      "loss": 2.5233,
      "step": 375
    },
    {
      "epoch": 0.14608678759622157,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019872060826968513,
      "loss": 2.4157,
      "step": 376
    },
    {
      "epoch": 0.14647531628663704,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019869887912555062,
      "loss": 2.4568,
      "step": 377
    },
    {
      "epoch": 0.14686384497705252,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001986769682165635,
      "loss": 2.4597,
      "step": 378
    },
    {
      "epoch": 0.14725237366746802,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001986548755830751,
      "loss": 2.5093,
      "step": 379
    },
    {
      "epoch": 0.1476409023578835,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019863260126577146,
      "loss": 2.4912,
      "step": 380
    },
    {
      "epoch": 0.14802943104829897,
      "grad_norm": 0.875,
      "learning_rate": 0.00019861014530567314,
      "loss": 2.5111,
      "step": 381
    },
    {
      "epoch": 0.14841795973871447,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001985875077441353,
      "loss": 2.5189,
      "step": 382
    },
    {
      "epoch": 0.14880648842912994,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019856468862284752,
      "loss": 2.4845,
      "step": 383
    },
    {
      "epoch": 0.14919501711954541,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001985416879838337,
      "loss": 2.5138,
      "step": 384
    },
    {
      "epoch": 0.14958354580996092,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019851850586945203,
      "loss": 2.459,
      "step": 385
    },
    {
      "epoch": 0.1499720745003764,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019849514232239495,
      "loss": 2.4799,
      "step": 386
    },
    {
      "epoch": 0.15036060319079186,
      "grad_norm": 0.921875,
      "learning_rate": 0.000198471597385689,
      "loss": 2.4408,
      "step": 387
    },
    {
      "epoch": 0.15074913188120737,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00019844787110269478,
      "loss": 2.4064,
      "step": 388
    },
    {
      "epoch": 0.15113766057162284,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019842396351710685,
      "loss": 2.4311,
      "step": 389
    },
    {
      "epoch": 0.1515261892620383,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019839987467295362,
      "loss": 2.4982,
      "step": 390
    },
    {
      "epoch": 0.15191471795245381,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019837560461459744,
      "loss": 2.504,
      "step": 391
    },
    {
      "epoch": 0.1523032466428693,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001983511533867342,
      "loss": 2.5254,
      "step": 392
    },
    {
      "epoch": 0.15269177533328476,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001983265210343936,
      "loss": 2.4947,
      "step": 393
    },
    {
      "epoch": 0.15308030402370024,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019830170760293877,
      "loss": 2.4509,
      "step": 394
    },
    {
      "epoch": 0.15346883271411574,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019827671313806642,
      "loss": 2.4467,
      "step": 395
    },
    {
      "epoch": 0.1538573614045312,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001982515376858066,
      "loss": 2.4584,
      "step": 396
    },
    {
      "epoch": 0.15424589009494669,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019822618129252263,
      "loss": 2.4464,
      "step": 397
    },
    {
      "epoch": 0.1546344187853622,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019820064400491116,
      "loss": 2.471,
      "step": 398
    },
    {
      "epoch": 0.15502294747577766,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001981749258700019,
      "loss": 2.4903,
      "step": 399
    },
    {
      "epoch": 0.15541147616619314,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019814902693515764,
      "loss": 2.4695,
      "step": 400
    },
    {
      "epoch": 0.15580000485660864,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019812294724807407,
      "loss": 2.5074,
      "step": 401
    },
    {
      "epoch": 0.1561885335470241,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019809668685677988,
      "loss": 2.5358,
      "step": 402
    },
    {
      "epoch": 0.15657706223743958,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001980702458096364,
      "loss": 2.5308,
      "step": 403
    },
    {
      "epoch": 0.1569655909278551,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019804362415533775,
      "loss": 2.4968,
      "step": 404
    },
    {
      "epoch": 0.15735411961827056,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019801682194291068,
      "loss": 2.478,
      "step": 405
    },
    {
      "epoch": 0.15774264830868603,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019798983922171437,
      "loss": 2.4332,
      "step": 406
    },
    {
      "epoch": 0.15813117699910154,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00019796267604144045,
      "loss": 2.5514,
      "step": 407
    },
    {
      "epoch": 0.158519705689517,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019793533245211298,
      "loss": 2.4069,
      "step": 408
    },
    {
      "epoch": 0.15890823437993248,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019790780850408816,
      "loss": 2.508,
      "step": 409
    },
    {
      "epoch": 0.15929676307034799,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019788010424805433,
      "loss": 2.4841,
      "step": 410
    },
    {
      "epoch": 0.15968529176076346,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019785221973503197,
      "loss": 2.4688,
      "step": 411
    },
    {
      "epoch": 0.16007382045117893,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019782415501637347,
      "loss": 2.4655,
      "step": 412
    },
    {
      "epoch": 0.16046234914159443,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001977959101437631,
      "loss": 2.5057,
      "step": 413
    },
    {
      "epoch": 0.1608508778320099,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001977674851692169,
      "loss": 2.4822,
      "step": 414
    },
    {
      "epoch": 0.16123940652242538,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019773888014508259,
      "loss": 2.4492,
      "step": 415
    },
    {
      "epoch": 0.16162793521284088,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019771009512403948,
      "loss": 2.4674,
      "step": 416
    },
    {
      "epoch": 0.16201646390325636,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019768113015909835,
      "loss": 2.489,
      "step": 417
    },
    {
      "epoch": 0.16240499259367183,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019765198530360137,
      "loss": 2.4627,
      "step": 418
    },
    {
      "epoch": 0.16279352128408733,
      "grad_norm": 0.84765625,
      "learning_rate": 0.000197622660611222,
      "loss": 2.4688,
      "step": 419
    },
    {
      "epoch": 0.1631820499745028,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019759315613596492,
      "loss": 2.4542,
      "step": 420
    },
    {
      "epoch": 0.16357057866491828,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001975634719321658,
      "loss": 2.4351,
      "step": 421
    },
    {
      "epoch": 0.16395910735533378,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019753360805449145,
      "loss": 2.4476,
      "step": 422
    },
    {
      "epoch": 0.16434763604574926,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001975035645579394,
      "loss": 2.4848,
      "step": 423
    },
    {
      "epoch": 0.16473616473616473,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019747334149783813,
      "loss": 2.5401,
      "step": 424
    },
    {
      "epoch": 0.16512469342658023,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001974429389298467,
      "loss": 2.4539,
      "step": 425
    },
    {
      "epoch": 0.1655132221169957,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001974123569099548,
      "loss": 2.4487,
      "step": 426
    },
    {
      "epoch": 0.16590175080741118,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001973815954944826,
      "loss": 2.4444,
      "step": 427
    },
    {
      "epoch": 0.16629027949782665,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019735065474008062,
      "loss": 2.4784,
      "step": 428
    },
    {
      "epoch": 0.16667880818824216,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019731953470372964,
      "loss": 2.4598,
      "step": 429
    },
    {
      "epoch": 0.16706733687865763,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019728823544274063,
      "loss": 2.4388,
      "step": 430
    },
    {
      "epoch": 0.1674558655690731,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019725675701475468,
      "loss": 2.4653,
      "step": 431
    },
    {
      "epoch": 0.1678443942594886,
      "grad_norm": 0.875,
      "learning_rate": 0.00019722509947774276,
      "loss": 2.4829,
      "step": 432
    },
    {
      "epoch": 0.16823292294990408,
      "grad_norm": 0.875,
      "learning_rate": 0.00019719326289000568,
      "loss": 2.4322,
      "step": 433
    },
    {
      "epoch": 0.16862145164031955,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019716124731017405,
      "loss": 2.4855,
      "step": 434
    },
    {
      "epoch": 0.16900998033073505,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019712905279720806,
      "loss": 2.4812,
      "step": 435
    },
    {
      "epoch": 0.16939850902115053,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001970966794103975,
      "loss": 2.4207,
      "step": 436
    },
    {
      "epoch": 0.169787037711566,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019706412720936145,
      "loss": 2.3899,
      "step": 437
    },
    {
      "epoch": 0.1701755664019815,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019703139625404844,
      "loss": 2.4568,
      "step": 438
    },
    {
      "epoch": 0.17056409509239698,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019699848660473612,
      "loss": 2.495,
      "step": 439
    },
    {
      "epoch": 0.17095262378281245,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001969653983220312,
      "loss": 2.5276,
      "step": 440
    },
    {
      "epoch": 0.17134115247322795,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001969321314668694,
      "loss": 2.4646,
      "step": 441
    },
    {
      "epoch": 0.17172968116364343,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001968986861005153,
      "loss": 2.4508,
      "step": 442
    },
    {
      "epoch": 0.1721182098540589,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019686506228456225,
      "loss": 2.5221,
      "step": 443
    },
    {
      "epoch": 0.1725067385444744,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001968312600809322,
      "loss": 2.4256,
      "step": 444
    },
    {
      "epoch": 0.17289526723488988,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001967972795518756,
      "loss": 2.4289,
      "step": 445
    },
    {
      "epoch": 0.17328379592530535,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019676312075997139,
      "loss": 2.374,
      "step": 446
    },
    {
      "epoch": 0.17367232461572085,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019672878376812667,
      "loss": 2.4398,
      "step": 447
    },
    {
      "epoch": 0.17406085330613633,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019669426863957686,
      "loss": 2.3595,
      "step": 448
    },
    {
      "epoch": 0.1744493819965518,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019665957543788532,
      "loss": 2.4631,
      "step": 449
    },
    {
      "epoch": 0.1748379106869673,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019662470422694342,
      "loss": 2.4148,
      "step": 450
    },
    {
      "epoch": 0.17522643937738278,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001965896550709704,
      "loss": 2.4994,
      "step": 451
    },
    {
      "epoch": 0.17561496806779825,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019655442803451301,
      "loss": 2.4471,
      "step": 452
    },
    {
      "epoch": 0.17600349675821375,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019651902318244582,
      "loss": 2.5041,
      "step": 453
    },
    {
      "epoch": 0.17639202544862922,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001964834405799707,
      "loss": 2.4156,
      "step": 454
    },
    {
      "epoch": 0.1767805541390447,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019644768029261698,
      "loss": 2.5044,
      "step": 455
    },
    {
      "epoch": 0.1771690828294602,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019641174238624115,
      "loss": 2.482,
      "step": 456
    },
    {
      "epoch": 0.17755761151987567,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019637562692702677,
      "loss": 2.4772,
      "step": 457
    },
    {
      "epoch": 0.17794614021029115,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019633933398148452,
      "loss": 2.3809,
      "step": 458
    },
    {
      "epoch": 0.17833466890070665,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001963028636164518,
      "loss": 2.4452,
      "step": 459
    },
    {
      "epoch": 0.17872319759112212,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001962662158990928,
      "loss": 2.4653,
      "step": 460
    },
    {
      "epoch": 0.1791117262815376,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019622939089689837,
      "loss": 2.4565,
      "step": 461
    },
    {
      "epoch": 0.17950025497195307,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019619238867768577,
      "loss": 2.4595,
      "step": 462
    },
    {
      "epoch": 0.17988878366236857,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019615520930959867,
      "loss": 2.4754,
      "step": 463
    },
    {
      "epoch": 0.18027731235278405,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019611785286110695,
      "loss": 2.4152,
      "step": 464
    },
    {
      "epoch": 0.18066584104319952,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019608031940100665,
      "loss": 2.4357,
      "step": 465
    },
    {
      "epoch": 0.18105436973361502,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019604260899841979,
      "loss": 2.5019,
      "step": 466
    },
    {
      "epoch": 0.1814428984240305,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001960047217227942,
      "loss": 2.5252,
      "step": 467
    },
    {
      "epoch": 0.18183142711444597,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019596665764390342,
      "loss": 2.3954,
      "step": 468
    },
    {
      "epoch": 0.18221995580486147,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001959284168318467,
      "loss": 2.4657,
      "step": 469
    },
    {
      "epoch": 0.18260848449527695,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019588999935704876,
      "loss": 2.5376,
      "step": 470
    },
    {
      "epoch": 0.18299701318569242,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001958514052902595,
      "loss": 2.3757,
      "step": 471
    },
    {
      "epoch": 0.18338554187610792,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019581263470255417,
      "loss": 2.4966,
      "step": 472
    },
    {
      "epoch": 0.1837740705665234,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001957736876653331,
      "loss": 2.4341,
      "step": 473
    },
    {
      "epoch": 0.18416259925693887,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019573456425032158,
      "loss": 2.3674,
      "step": 474
    },
    {
      "epoch": 0.18455112794735437,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019569526452956962,
      "loss": 2.4213,
      "step": 475
    },
    {
      "epoch": 0.18493965663776984,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019565578857545202,
      "loss": 2.4737,
      "step": 476
    },
    {
      "epoch": 0.18532818532818532,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001956161364606681,
      "loss": 2.4017,
      "step": 477
    },
    {
      "epoch": 0.18571671401860082,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019557630825824156,
      "loss": 2.515,
      "step": 478
    },
    {
      "epoch": 0.1861052427090163,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019553630404152043,
      "loss": 2.4351,
      "step": 479
    },
    {
      "epoch": 0.18649377139943177,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019549612388417692,
      "loss": 2.3931,
      "step": 480
    },
    {
      "epoch": 0.18688230008984727,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019545576786020717,
      "loss": 2.4537,
      "step": 481
    },
    {
      "epoch": 0.18727082878026274,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019541523604393128,
      "loss": 2.514,
      "step": 482
    },
    {
      "epoch": 0.18765935747067822,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019537452850999302,
      "loss": 2.4735,
      "step": 483
    },
    {
      "epoch": 0.18804788616109372,
      "grad_norm": 0.875,
      "learning_rate": 0.0001953336453333598,
      "loss": 2.4783,
      "step": 484
    },
    {
      "epoch": 0.1884364148515092,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019529258658932248,
      "loss": 2.4402,
      "step": 485
    },
    {
      "epoch": 0.18882494354192467,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001952513523534953,
      "loss": 2.446,
      "step": 486
    },
    {
      "epoch": 0.18921347223234017,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001952099427018156,
      "loss": 2.4653,
      "step": 487
    },
    {
      "epoch": 0.18960200092275564,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019516835771054382,
      "loss": 2.439,
      "step": 488
    },
    {
      "epoch": 0.18999052961317112,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019512659745626334,
      "loss": 2.5075,
      "step": 489
    },
    {
      "epoch": 0.19037905830358662,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001950846620158802,
      "loss": 2.4529,
      "step": 490
    },
    {
      "epoch": 0.1907675869940021,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019504255146662321,
      "loss": 2.3809,
      "step": 491
    },
    {
      "epoch": 0.19115611568441757,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001950002658860435,
      "loss": 2.4723,
      "step": 492
    },
    {
      "epoch": 0.19154464437483307,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001949578053520147,
      "loss": 2.4074,
      "step": 493
    },
    {
      "epoch": 0.19193317306524854,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001949151699427325,
      "loss": 2.4241,
      "step": 494
    },
    {
      "epoch": 0.19232170175566402,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019487235973671471,
      "loss": 2.4672,
      "step": 495
    },
    {
      "epoch": 0.1927102304460795,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001948293748128011,
      "loss": 2.4237,
      "step": 496
    },
    {
      "epoch": 0.193098759136495,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019478621525015303,
      "loss": 2.442,
      "step": 497
    },
    {
      "epoch": 0.19348728782691046,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019474288112825367,
      "loss": 2.4969,
      "step": 498
    },
    {
      "epoch": 0.19387581651732594,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019469937252690754,
      "loss": 2.4462,
      "step": 499
    },
    {
      "epoch": 0.19426434520774144,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019465568952624053,
      "loss": 2.3824,
      "step": 500
    },
    {
      "epoch": 0.19465287389815691,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019461183220669974,
      "loss": 2.5187,
      "step": 501
    },
    {
      "epoch": 0.1950414025885724,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019456780064905317,
      "loss": 2.4442,
      "step": 502
    },
    {
      "epoch": 0.1954299312789879,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019452359493438983,
      "loss": 2.4223,
      "step": 503
    },
    {
      "epoch": 0.19581845996940336,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001944792151441194,
      "loss": 2.544,
      "step": 504
    },
    {
      "epoch": 0.19620698865981884,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00019443466135997216,
      "loss": 2.4107,
      "step": 505
    },
    {
      "epoch": 0.19659551735023434,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001943899336639988,
      "loss": 2.4845,
      "step": 506
    },
    {
      "epoch": 0.1969840460406498,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00019434503213857027,
      "loss": 2.4167,
      "step": 507
    },
    {
      "epoch": 0.1973725747310653,
      "grad_norm": 0.98828125,
      "learning_rate": 0.0001942999568663777,
      "loss": 2.4629,
      "step": 508
    },
    {
      "epoch": 0.1977611034214808,
      "grad_norm": 0.94140625,
      "learning_rate": 0.0001942547079304322,
      "loss": 2.4738,
      "step": 509
    },
    {
      "epoch": 0.19814963211189626,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019420928541406463,
      "loss": 2.4505,
      "step": 510
    },
    {
      "epoch": 0.19853816080231174,
      "grad_norm": 1.5390625,
      "learning_rate": 0.00019416368940092555,
      "loss": 2.4139,
      "step": 511
    },
    {
      "epoch": 0.19892668949272724,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019411791997498502,
      "loss": 2.4519,
      "step": 512
    },
    {
      "epoch": 0.1993152181831427,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019407197722053255,
      "loss": 2.4682,
      "step": 513
    },
    {
      "epoch": 0.19970374687355819,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001940258612221767,
      "loss": 2.4073,
      "step": 514
    },
    {
      "epoch": 0.2000922755639737,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001939795720648452,
      "loss": 2.4505,
      "step": 515
    },
    {
      "epoch": 0.20048080425438916,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019393310983378462,
      "loss": 2.4493,
      "step": 516
    },
    {
      "epoch": 0.20086933294480463,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001938864746145603,
      "loss": 2.4376,
      "step": 517
    },
    {
      "epoch": 0.20125786163522014,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019383966649305607,
      "loss": 2.3207,
      "step": 518
    },
    {
      "epoch": 0.2016463903256356,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00019379268555547432,
      "loss": 2.522,
      "step": 519
    },
    {
      "epoch": 0.20203491901605108,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019374553188833554,
      "loss": 2.491,
      "step": 520
    },
    {
      "epoch": 0.20242344770646659,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019369820557847844,
      "loss": 2.4827,
      "step": 521
    },
    {
      "epoch": 0.20281197639688206,
      "grad_norm": 0.796875,
      "learning_rate": 0.00019365070671305962,
      "loss": 2.4586,
      "step": 522
    },
    {
      "epoch": 0.20320050508729753,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001936030353795535,
      "loss": 2.4202,
      "step": 523
    },
    {
      "epoch": 0.20358903377771304,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019355519166575205,
      "loss": 2.4167,
      "step": 524
    },
    {
      "epoch": 0.2039775624681285,
      "grad_norm": 0.875,
      "learning_rate": 0.00019350717565976474,
      "loss": 2.4965,
      "step": 525
    },
    {
      "epoch": 0.20436609115854398,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019345898745001838,
      "loss": 2.4731,
      "step": 526
    },
    {
      "epoch": 0.20475461984895948,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001934106271252568,
      "loss": 2.4869,
      "step": 527
    },
    {
      "epoch": 0.20514314853937496,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019336209477454087,
      "loss": 2.3862,
      "step": 528
    },
    {
      "epoch": 0.20553167722979043,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001933133904872483,
      "loss": 2.4865,
      "step": 529
    },
    {
      "epoch": 0.2059202059202059,
      "grad_norm": 3.5,
      "learning_rate": 0.0001932645143530734,
      "loss": 2.4302,
      "step": 530
    },
    {
      "epoch": 0.2063087346106214,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00019321546646202688,
      "loss": 2.4552,
      "step": 531
    },
    {
      "epoch": 0.20669726330103688,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001931662469044359,
      "loss": 2.4428,
      "step": 532
    },
    {
      "epoch": 0.20708579199145236,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00019311685577094368,
      "loss": 2.5089,
      "step": 533
    },
    {
      "epoch": 0.20747432068186786,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001930672931525094,
      "loss": 2.4275,
      "step": 534
    },
    {
      "epoch": 0.20786284937228333,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001930175591404081,
      "loss": 2.3744,
      "step": 535
    },
    {
      "epoch": 0.2082513780626988,
      "grad_norm": 0.875,
      "learning_rate": 0.00019296765382623049,
      "loss": 2.4922,
      "step": 536
    },
    {
      "epoch": 0.2086399067531143,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001929175773018826,
      "loss": 2.5342,
      "step": 537
    },
    {
      "epoch": 0.20902843544352978,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019286732965958592,
      "loss": 2.4245,
      "step": 538
    },
    {
      "epoch": 0.20941696413394525,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019281691099187702,
      "loss": 2.4321,
      "step": 539
    },
    {
      "epoch": 0.20980549282436076,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001927663213916074,
      "loss": 2.4336,
      "step": 540
    },
    {
      "epoch": 0.21019402151477623,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001927155609519434,
      "loss": 2.4085,
      "step": 541
    },
    {
      "epoch": 0.2105825502051917,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019266462976636598,
      "loss": 2.4764,
      "step": 542
    },
    {
      "epoch": 0.2109710788956072,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019261352792867046,
      "loss": 2.4533,
      "step": 543
    },
    {
      "epoch": 0.21135960758602268,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019256225553296655,
      "loss": 2.4508,
      "step": 544
    },
    {
      "epoch": 0.21174813627643815,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019251081267367807,
      "loss": 2.3913,
      "step": 545
    },
    {
      "epoch": 0.21213666496685366,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019245919944554257,
      "loss": 2.3765,
      "step": 546
    },
    {
      "epoch": 0.21252519365726913,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019240741594361155,
      "loss": 2.3952,
      "step": 547
    },
    {
      "epoch": 0.2129137223476846,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001923554622632501,
      "loss": 2.4084,
      "step": 548
    },
    {
      "epoch": 0.2133022510381001,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019230333850013654,
      "loss": 2.3587,
      "step": 549
    },
    {
      "epoch": 0.21369077972851558,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019225104475026258,
      "loss": 2.3831,
      "step": 550
    },
    {
      "epoch": 0.21407930841893105,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019219858110993288,
      "loss": 2.3703,
      "step": 551
    },
    {
      "epoch": 0.21446783710934655,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019214594767576508,
      "loss": 2.4301,
      "step": 552
    },
    {
      "epoch": 0.21485636579976203,
      "grad_norm": 1.15625,
      "learning_rate": 0.00019209314454468937,
      "loss": 2.4783,
      "step": 553
    },
    {
      "epoch": 0.2152448944901775,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019204017181394855,
      "loss": 2.4635,
      "step": 554
    },
    {
      "epoch": 0.215633423180593,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019198702958109776,
      "loss": 2.3172,
      "step": 555
    },
    {
      "epoch": 0.21602195187100848,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019193371794400425,
      "loss": 2.4102,
      "step": 556
    },
    {
      "epoch": 0.21641048056142395,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019188023700084726,
      "loss": 2.4023,
      "step": 557
    },
    {
      "epoch": 0.21679900925183945,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019182658685011785,
      "loss": 2.5074,
      "step": 558
    },
    {
      "epoch": 0.21718753794225493,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001917727675906187,
      "loss": 2.4028,
      "step": 559
    },
    {
      "epoch": 0.2175760666326704,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019171877932146387,
      "loss": 2.4564,
      "step": 560
    },
    {
      "epoch": 0.2179645953230859,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019166462214207869,
      "loss": 2.4189,
      "step": 561
    },
    {
      "epoch": 0.21835312401350138,
      "grad_norm": 0.875,
      "learning_rate": 0.00019161029615219962,
      "loss": 2.4836,
      "step": 562
    },
    {
      "epoch": 0.21874165270391685,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001915558014518739,
      "loss": 2.4663,
      "step": 563
    },
    {
      "epoch": 0.21913018139433232,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019150113814145957,
      "loss": 2.3867,
      "step": 564
    },
    {
      "epoch": 0.21951871008474783,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019144630632162508,
      "loss": 2.4438,
      "step": 565
    },
    {
      "epoch": 0.2199072387751633,
      "grad_norm": 0.875,
      "learning_rate": 0.0001913913060933493,
      "loss": 2.4158,
      "step": 566
    },
    {
      "epoch": 0.22029576746557877,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001913361375579212,
      "loss": 2.4088,
      "step": 567
    },
    {
      "epoch": 0.22068429615599428,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001912808008169397,
      "loss": 2.4266,
      "step": 568
    },
    {
      "epoch": 0.22107282484640975,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019122529597231352,
      "loss": 2.4021,
      "step": 569
    },
    {
      "epoch": 0.22146135353682522,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019116962312626092,
      "loss": 2.4939,
      "step": 570
    },
    {
      "epoch": 0.22184988222724072,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019111378238130956,
      "loss": 2.4856,
      "step": 571
    },
    {
      "epoch": 0.2222384109176562,
      "grad_norm": 5.90625,
      "learning_rate": 0.0001910577738402964,
      "loss": 2.5271,
      "step": 572
    },
    {
      "epoch": 0.22262693960807167,
      "grad_norm": 1.0,
      "learning_rate": 0.00019100159760636727,
      "loss": 2.4331,
      "step": 573
    },
    {
      "epoch": 0.22301546829848717,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019094525378297687,
      "loss": 2.4777,
      "step": 574
    },
    {
      "epoch": 0.22340399698890265,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019088874247388864,
      "loss": 2.5293,
      "step": 575
    },
    {
      "epoch": 0.22379252567931812,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001908320637831743,
      "loss": 2.4537,
      "step": 576
    },
    {
      "epoch": 0.22418105436973362,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019077521781521388,
      "loss": 2.4579,
      "step": 577
    },
    {
      "epoch": 0.2245695830601491,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019071820467469558,
      "loss": 2.4129,
      "step": 578
    },
    {
      "epoch": 0.22495811175056457,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001906610244666153,
      "loss": 2.3975,
      "step": 579
    },
    {
      "epoch": 0.22534664044098007,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019060367729627673,
      "loss": 2.4488,
      "step": 580
    },
    {
      "epoch": 0.22573516913139555,
      "grad_norm": 0.89453125,
      "learning_rate": 0.000190546163269291,
      "loss": 2.5263,
      "step": 581
    },
    {
      "epoch": 0.22612369782181102,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019048848249157647,
      "loss": 2.4725,
      "step": 582
    },
    {
      "epoch": 0.22651222651222652,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001904306350693587,
      "loss": 2.4523,
      "step": 583
    },
    {
      "epoch": 0.226900755202642,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00019037262110917008,
      "loss": 2.5423,
      "step": 584
    },
    {
      "epoch": 0.22728928389305747,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019031444071784966,
      "loss": 2.4369,
      "step": 585
    },
    {
      "epoch": 0.22767781258347297,
      "grad_norm": 0.875,
      "learning_rate": 0.00019025609400254308,
      "loss": 2.4778,
      "step": 586
    },
    {
      "epoch": 0.22806634127388845,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019019758107070224,
      "loss": 2.3663,
      "step": 587
    },
    {
      "epoch": 0.22845486996430392,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019013890203008514,
      "loss": 2.3942,
      "step": 588
    },
    {
      "epoch": 0.22884339865471942,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019008005698875567,
      "loss": 2.4601,
      "step": 589
    },
    {
      "epoch": 0.2292319273451349,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019002104605508343,
      "loss": 2.4136,
      "step": 590
    },
    {
      "epoch": 0.22962045603555037,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018996186933774366,
      "loss": 2.4091,
      "step": 591
    },
    {
      "epoch": 0.23000898472596587,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018990252694571667,
      "loss": 2.3836,
      "step": 592
    },
    {
      "epoch": 0.23039751341638134,
      "grad_norm": 0.828125,
      "learning_rate": 0.00018984301898828813,
      "loss": 2.4327,
      "step": 593
    },
    {
      "epoch": 0.23078604210679682,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00018978334557504843,
      "loss": 2.4264,
      "step": 594
    },
    {
      "epoch": 0.23117457079721232,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001897235068158927,
      "loss": 2.4683,
      "step": 595
    },
    {
      "epoch": 0.2315630994876278,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00018966350282102065,
      "loss": 2.4395,
      "step": 596
    },
    {
      "epoch": 0.23195162817804327,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00018960333370093626,
      "loss": 2.4799,
      "step": 597
    },
    {
      "epoch": 0.23234015686845874,
      "grad_norm": 1.125,
      "learning_rate": 0.00018954299956644755,
      "loss": 2.4338,
      "step": 598
    },
    {
      "epoch": 0.23272868555887424,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00018948250052866646,
      "loss": 2.4708,
      "step": 599
    },
    {
      "epoch": 0.23311721424928972,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001894218366990087,
      "loss": 2.4036,
      "step": 600
    },
    {
      "epoch": 0.2335057429397052,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018936100818919328,
      "loss": 2.4747,
      "step": 601
    },
    {
      "epoch": 0.2338942716301207,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001893000151112427,
      "loss": 2.5058,
      "step": 602
    },
    {
      "epoch": 0.23428280032053617,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00018923885757748238,
      "loss": 2.4404,
      "step": 603
    },
    {
      "epoch": 0.23467132901095164,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018917753570054066,
      "loss": 2.4196,
      "step": 604
    },
    {
      "epoch": 0.23505985770136714,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00018911604959334858,
      "loss": 2.4353,
      "step": 605
    },
    {
      "epoch": 0.23544838639178262,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00018905439936913948,
      "loss": 2.476,
      "step": 606
    },
    {
      "epoch": 0.2358369150821981,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018899258514144907,
      "loss": 2.4251,
      "step": 607
    },
    {
      "epoch": 0.2362254437726136,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00018893060702411508,
      "loss": 2.4725,
      "step": 608
    },
    {
      "epoch": 0.23661397246302907,
      "grad_norm": 0.86328125,
      "learning_rate": 0.000188868465131277,
      "loss": 2.4095,
      "step": 609
    },
    {
      "epoch": 0.23700250115344454,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00018880615957737602,
      "loss": 2.4845,
      "step": 610
    },
    {
      "epoch": 0.23739102984386004,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018874369047715465,
      "loss": 2.3689,
      "step": 611
    },
    {
      "epoch": 0.23777955853427551,
      "grad_norm": 0.77734375,
      "learning_rate": 0.0001886810579456566,
      "loss": 2.4247,
      "step": 612
    },
    {
      "epoch": 0.238168087224691,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00018861826209822663,
      "loss": 2.3775,
      "step": 613
    },
    {
      "epoch": 0.2385566159151065,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00018855530305051011,
      "loss": 2.4572,
      "step": 614
    },
    {
      "epoch": 0.23894514460552196,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018849218091845314,
      "loss": 2.4004,
      "step": 615
    },
    {
      "epoch": 0.23933367329593744,
      "grad_norm": 0.8125,
      "learning_rate": 0.00018842889581830206,
      "loss": 2.4783,
      "step": 616
    },
    {
      "epoch": 0.23972220198635294,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001883654478666033,
      "loss": 2.3635,
      "step": 617
    },
    {
      "epoch": 0.2401107306767684,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00018830183718020332,
      "loss": 2.3983,
      "step": 618
    },
    {
      "epoch": 0.2404992593671839,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018823806387624817,
      "loss": 2.4074,
      "step": 619
    },
    {
      "epoch": 0.2408877880575994,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00018817412807218338,
      "loss": 2.5352,
      "step": 620
    },
    {
      "epoch": 0.24127631674801486,
      "grad_norm": 0.875,
      "learning_rate": 0.00018811002988575382,
      "loss": 2.4445,
      "step": 621
    },
    {
      "epoch": 0.24166484543843034,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00018804576943500333,
      "loss": 2.4991,
      "step": 622
    },
    {
      "epoch": 0.24205337412884584,
      "grad_norm": 0.984375,
      "learning_rate": 0.00018798134683827464,
      "loss": 2.4199,
      "step": 623
    },
    {
      "epoch": 0.2424419028192613,
      "grad_norm": 0.76953125,
      "learning_rate": 0.000187916762214209,
      "loss": 2.3854,
      "step": 624
    },
    {
      "epoch": 0.2428304315096768,
      "grad_norm": 1.0,
      "learning_rate": 0.00018785201568174617,
      "loss": 2.4612,
      "step": 625
    },
    {
      "epoch": 0.2432189602000923,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00018778710736012396,
      "loss": 2.4219,
      "step": 626
    },
    {
      "epoch": 0.24360748889050776,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001877220373688782,
      "loss": 2.4123,
      "step": 627
    },
    {
      "epoch": 0.24399601758092324,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001876568058278425,
      "loss": 2.4361,
      "step": 628
    },
    {
      "epoch": 0.24438454627133874,
      "grad_norm": 0.828125,
      "learning_rate": 0.00018759141285714783,
      "loss": 2.4355,
      "step": 629
    },
    {
      "epoch": 0.2447730749617542,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00018752585857722266,
      "loss": 2.4675,
      "step": 630
    },
    {
      "epoch": 0.24516160365216969,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018746014310879228,
      "loss": 2.4521,
      "step": 631
    },
    {
      "epoch": 0.24555013234258516,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00018739426657287907,
      "loss": 2.435,
      "step": 632
    },
    {
      "epoch": 0.24593866103300066,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001873282290908019,
      "loss": 2.4109,
      "step": 633
    },
    {
      "epoch": 0.24632718972341613,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00018726203078417604,
      "loss": 2.4263,
      "step": 634
    },
    {
      "epoch": 0.2467157184138316,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00018719567177491296,
      "loss": 2.3932,
      "step": 635
    },
    {
      "epoch": 0.2471042471042471,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001871291521852201,
      "loss": 2.3958,
      "step": 636
    },
    {
      "epoch": 0.24749277579466258,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001870624721376006,
      "loss": 2.4574,
      "step": 637
    },
    {
      "epoch": 0.24788130448507806,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001869956317548531,
      "loss": 2.4139,
      "step": 638
    },
    {
      "epoch": 0.24826983317549356,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00018692863116007152,
      "loss": 2.4308,
      "step": 639
    },
    {
      "epoch": 0.24865836186590903,
      "grad_norm": 0.78125,
      "learning_rate": 0.0001868614704766449,
      "loss": 2.3843,
      "step": 640
    },
    {
      "epoch": 0.2490468905563245,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00018679414982825693,
      "loss": 2.4272,
      "step": 641
    },
    {
      "epoch": 0.24943541924674,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00018672666933888603,
      "loss": 2.4451,
      "step": 642
    },
    {
      "epoch": 0.24982394793715548,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00018665902913280496,
      "loss": 2.4515,
      "step": 643
    },
    {
      "epoch": 0.25021247662757096,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018659122933458065,
      "loss": 2.399,
      "step": 644
    },
    {
      "epoch": 0.25060100531798646,
      "grad_norm": 0.796875,
      "learning_rate": 0.00018652327006907386,
      "loss": 2.5024,
      "step": 645
    },
    {
      "epoch": 0.25098953400840196,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018645515146143904,
      "loss": 2.3793,
      "step": 646
    },
    {
      "epoch": 0.2513780626988174,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001863868736371241,
      "loss": 2.4433,
      "step": 647
    },
    {
      "epoch": 0.2517665913892329,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018631843672187022,
      "loss": 2.4498,
      "step": 648
    },
    {
      "epoch": 0.25215512007964835,
      "grad_norm": 0.859375,
      "learning_rate": 0.00018624984084171148,
      "loss": 2.4227,
      "step": 649
    },
    {
      "epoch": 0.25254364877006386,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00018618108612297474,
      "loss": 2.5026,
      "step": 650
    },
    {
      "epoch": 0.25293217746047936,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001861121726922794,
      "loss": 2.4045,
      "step": 651
    },
    {
      "epoch": 0.2533207061508948,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018604310067653712,
      "loss": 2.445,
      "step": 652
    },
    {
      "epoch": 0.2537092348413103,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001859738702029516,
      "loss": 2.3895,
      "step": 653
    },
    {
      "epoch": 0.2540977635317258,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018590448139901847,
      "loss": 2.4448,
      "step": 654
    },
    {
      "epoch": 0.25448629222214125,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018583493439252476,
      "loss": 2.4411,
      "step": 655
    },
    {
      "epoch": 0.25487482091255675,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018576522931154895,
      "loss": 2.4646,
      "step": 656
    },
    {
      "epoch": 0.25526334960297226,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00018569536628446065,
      "loss": 2.4226,
      "step": 657
    },
    {
      "epoch": 0.2556518782933877,
      "grad_norm": 0.875,
      "learning_rate": 0.0001856253454399203,
      "loss": 2.4317,
      "step": 658
    },
    {
      "epoch": 0.2560404069838032,
      "grad_norm": 0.93359375,
      "learning_rate": 0.000185555166906879,
      "loss": 2.4337,
      "step": 659
    },
    {
      "epoch": 0.2564289356742187,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00018548483081457827,
      "loss": 2.4088,
      "step": 660
    },
    {
      "epoch": 0.25681746436463415,
      "grad_norm": 0.890625,
      "learning_rate": 0.00018541433729254972,
      "loss": 2.4964,
      "step": 661
    },
    {
      "epoch": 0.25720599305504965,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00018534368647061495,
      "loss": 2.4715,
      "step": 662
    },
    {
      "epoch": 0.25759452174546515,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018527287847888523,
      "loss": 2.4574,
      "step": 663
    },
    {
      "epoch": 0.2579830504358806,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001852019134477613,
      "loss": 2.4681,
      "step": 664
    },
    {
      "epoch": 0.2583715791262961,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00018513079150793305,
      "loss": 2.4757,
      "step": 665
    },
    {
      "epoch": 0.2587601078167116,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018505951279037934,
      "loss": 2.4098,
      "step": 666
    },
    {
      "epoch": 0.25914863650712705,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001849880774263678,
      "loss": 2.353,
      "step": 667
    },
    {
      "epoch": 0.25953716519754255,
      "grad_norm": 0.890625,
      "learning_rate": 0.00018491648554745457,
      "loss": 2.4913,
      "step": 668
    },
    {
      "epoch": 0.25992569388795805,
      "grad_norm": 0.796875,
      "learning_rate": 0.0001848447372854839,
      "loss": 2.4221,
      "step": 669
    },
    {
      "epoch": 0.2603142225783735,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00018477283277258812,
      "loss": 2.3949,
      "step": 670
    },
    {
      "epoch": 0.260702751268789,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00018470077214118732,
      "loss": 2.4428,
      "step": 671
    },
    {
      "epoch": 0.2610912799592045,
      "grad_norm": 0.75390625,
      "learning_rate": 0.0001846285555239891,
      "loss": 2.4141,
      "step": 672
    },
    {
      "epoch": 0.26147980864961995,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018455618305398836,
      "loss": 2.3777,
      "step": 673
    },
    {
      "epoch": 0.26186833734003545,
      "grad_norm": 0.84375,
      "learning_rate": 0.00018448365486446687,
      "loss": 2.412,
      "step": 674
    },
    {
      "epoch": 0.26225686603045095,
      "grad_norm": 0.828125,
      "learning_rate": 0.00018441097108899332,
      "loss": 2.4766,
      "step": 675
    },
    {
      "epoch": 0.2626453947208664,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001843381318614229,
      "loss": 2.4765,
      "step": 676
    },
    {
      "epoch": 0.2630339234112819,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001842651373158971,
      "loss": 2.4458,
      "step": 677
    },
    {
      "epoch": 0.2634224521016974,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001841919875868433,
      "loss": 2.3816,
      "step": 678
    },
    {
      "epoch": 0.26381098079211285,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00018411868280897488,
      "loss": 2.4251,
      "step": 679
    },
    {
      "epoch": 0.26419950948252835,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018404522311729062,
      "loss": 2.4098,
      "step": 680
    },
    {
      "epoch": 0.26458803817294385,
      "grad_norm": 0.828125,
      "learning_rate": 0.00018397160864707467,
      "loss": 2.4377,
      "step": 681
    },
    {
      "epoch": 0.2649765668633593,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00018389783953389613,
      "loss": 2.4096,
      "step": 682
    },
    {
      "epoch": 0.2653650955537748,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00018382391591360896,
      "loss": 2.4866,
      "step": 683
    },
    {
      "epoch": 0.2657536242441903,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00018374983792235163,
      "loss": 2.4486,
      "step": 684
    },
    {
      "epoch": 0.26614215293460575,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00018367560569654693,
      "loss": 2.4155,
      "step": 685
    },
    {
      "epoch": 0.26653068162502125,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001836012193729017,
      "loss": 2.3801,
      "step": 686
    },
    {
      "epoch": 0.26691921031543675,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00018352667908840654,
      "loss": 2.3832,
      "step": 687
    },
    {
      "epoch": 0.2673077390058522,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00018345198498033558,
      "loss": 2.4371,
      "step": 688
    },
    {
      "epoch": 0.2676962676962677,
      "grad_norm": 0.796875,
      "learning_rate": 0.00018337713718624623,
      "loss": 2.5273,
      "step": 689
    },
    {
      "epoch": 0.2680847963866832,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00018330213584397896,
      "loss": 2.4359,
      "step": 690
    },
    {
      "epoch": 0.26847332507709865,
      "grad_norm": 0.76953125,
      "learning_rate": 0.000183226981091657,
      "loss": 2.5058,
      "step": 691
    },
    {
      "epoch": 0.26886185376751415,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001831516730676861,
      "loss": 2.4414,
      "step": 692
    },
    {
      "epoch": 0.26925038245792965,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00018307621191075425,
      "loss": 2.4316,
      "step": 693
    },
    {
      "epoch": 0.2696389111483451,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00018300059775983152,
      "loss": 2.4051,
      "step": 694
    },
    {
      "epoch": 0.2700274398387606,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001829248307541697,
      "loss": 2.4377,
      "step": 695
    },
    {
      "epoch": 0.2704159685291761,
      "grad_norm": 0.78125,
      "learning_rate": 0.000182848911033302,
      "loss": 2.3461,
      "step": 696
    },
    {
      "epoch": 0.27080449721959154,
      "grad_norm": 0.88671875,
      "learning_rate": 0.000182772838737043,
      "loss": 2.4788,
      "step": 697
    },
    {
      "epoch": 0.27119302591000705,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00018269661400548823,
      "loss": 2.4301,
      "step": 698
    },
    {
      "epoch": 0.27158155460042255,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001826202369790139,
      "loss": 2.2877,
      "step": 699
    },
    {
      "epoch": 0.271970083290838,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00018254370779827668,
      "loss": 2.4765,
      "step": 700
    },
    {
      "epoch": 0.2723586119812535,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00018246702660421356,
      "loss": 2.371,
      "step": 701
    },
    {
      "epoch": 0.272747140671669,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00018239019353804133,
      "loss": 2.4542,
      "step": 702
    },
    {
      "epoch": 0.27313566936208444,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00018231320874125654,
      "loss": 2.4434,
      "step": 703
    },
    {
      "epoch": 0.27352419805249994,
      "grad_norm": 1.890625,
      "learning_rate": 0.00018223607235563524,
      "loss": 2.4492,
      "step": 704
    },
    {
      "epoch": 0.27391272674291545,
      "grad_norm": 0.91015625,
      "learning_rate": 0.0001821587845232325,
      "loss": 2.4152,
      "step": 705
    },
    {
      "epoch": 0.2743012554333309,
      "grad_norm": 1.0625,
      "learning_rate": 0.00018208134538638245,
      "loss": 2.4712,
      "step": 706
    },
    {
      "epoch": 0.2746897841237464,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00018200375508769772,
      "loss": 2.4033,
      "step": 707
    },
    {
      "epoch": 0.2750783128141619,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001819260137700694,
      "loss": 2.4302,
      "step": 708
    },
    {
      "epoch": 0.27546684150457734,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00018184812157666667,
      "loss": 2.3352,
      "step": 709
    },
    {
      "epoch": 0.27585537019499284,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00018177007865093664,
      "loss": 2.4609,
      "step": 710
    },
    {
      "epoch": 0.27624389888540835,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001816918851366039,
      "loss": 2.4525,
      "step": 711
    },
    {
      "epoch": 0.2766324275758238,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00018161354117767045,
      "loss": 2.4259,
      "step": 712
    },
    {
      "epoch": 0.2770209562662393,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00018153504691841528,
      "loss": 2.4029,
      "step": 713
    },
    {
      "epoch": 0.2774094849566548,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00018145640250339425,
      "loss": 2.3984,
      "step": 714
    },
    {
      "epoch": 0.27779801364707024,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018137760807743965,
      "loss": 2.4384,
      "step": 715
    },
    {
      "epoch": 0.27818654233748574,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018129866378566013,
      "loss": 2.427,
      "step": 716
    },
    {
      "epoch": 0.27857507102790124,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00018121956977344033,
      "loss": 2.4698,
      "step": 717
    },
    {
      "epoch": 0.2789635997183167,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00018114032618644053,
      "loss": 2.3839,
      "step": 718
    },
    {
      "epoch": 0.2793521284087322,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001810609331705965,
      "loss": 2.4457,
      "step": 719
    },
    {
      "epoch": 0.27974065709914764,
      "grad_norm": 1.0,
      "learning_rate": 0.00018098139087211927,
      "loss": 2.4193,
      "step": 720
    },
    {
      "epoch": 0.28012918578956314,
      "grad_norm": 1.09375,
      "learning_rate": 0.00018090169943749476,
      "loss": 2.4061,
      "step": 721
    },
    {
      "epoch": 0.28051771447997864,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00018082185901348346,
      "loss": 2.3813,
      "step": 722
    },
    {
      "epoch": 0.2809062431703941,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018074186974712032,
      "loss": 2.4133,
      "step": 723
    },
    {
      "epoch": 0.2812947718608096,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001806617317857144,
      "loss": 2.4118,
      "step": 724
    },
    {
      "epoch": 0.2816833005512251,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00018058144527684854,
      "loss": 2.4778,
      "step": 725
    },
    {
      "epoch": 0.28207182924164054,
      "grad_norm": 1.09375,
      "learning_rate": 0.00018050101036837926,
      "loss": 2.4505,
      "step": 726
    },
    {
      "epoch": 0.28246035793205604,
      "grad_norm": 1.078125,
      "learning_rate": 0.00018042042720843623,
      "loss": 2.3984,
      "step": 727
    },
    {
      "epoch": 0.28284888662247154,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001803396959454222,
      "loss": 2.4154,
      "step": 728
    },
    {
      "epoch": 0.283237415312887,
      "grad_norm": 1.078125,
      "learning_rate": 0.00018025881672801273,
      "loss": 2.4028,
      "step": 729
    },
    {
      "epoch": 0.2836259440033025,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001801777897051558,
      "loss": 2.4469,
      "step": 730
    },
    {
      "epoch": 0.284014472693718,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00018009661502607158,
      "loss": 2.2968,
      "step": 731
    },
    {
      "epoch": 0.28440300138413344,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001800152928402522,
      "loss": 2.4668,
      "step": 732
    },
    {
      "epoch": 0.28479153007454894,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00017993382329746133,
      "loss": 2.4243,
      "step": 733
    },
    {
      "epoch": 0.28518005876496444,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001798522065477342,
      "loss": 2.3054,
      "step": 734
    },
    {
      "epoch": 0.2855685874553799,
      "grad_norm": 0.96875,
      "learning_rate": 0.00017977044274137703,
      "loss": 2.3318,
      "step": 735
    },
    {
      "epoch": 0.2859571161457954,
      "grad_norm": 0.96875,
      "learning_rate": 0.00017968853202896684,
      "loss": 2.3392,
      "step": 736
    },
    {
      "epoch": 0.2863456448362109,
      "grad_norm": 0.796875,
      "learning_rate": 0.00017960647456135125,
      "loss": 2.3525,
      "step": 737
    },
    {
      "epoch": 0.28673417352662633,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001795242704896481,
      "loss": 2.4782,
      "step": 738
    },
    {
      "epoch": 0.28712270221704184,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00017944191996524525,
      "loss": 2.4221,
      "step": 739
    },
    {
      "epoch": 0.28751123090745734,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00017935942313980022,
      "loss": 2.4797,
      "step": 740
    },
    {
      "epoch": 0.2878997595978728,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00017927678016524005,
      "loss": 2.4934,
      "step": 741
    },
    {
      "epoch": 0.2882882882882883,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017919399119376087,
      "loss": 2.4664,
      "step": 742
    },
    {
      "epoch": 0.2886768169787038,
      "grad_norm": 1.140625,
      "learning_rate": 0.00017911105637782767,
      "loss": 2.4354,
      "step": 743
    },
    {
      "epoch": 0.28906534566911923,
      "grad_norm": 0.8125,
      "learning_rate": 0.00017902797587017406,
      "loss": 2.4726,
      "step": 744
    },
    {
      "epoch": 0.28945387435953474,
      "grad_norm": 1.0,
      "learning_rate": 0.00017894474982380193,
      "loss": 2.4309,
      "step": 745
    },
    {
      "epoch": 0.28984240304995024,
      "grad_norm": 0.95703125,
      "learning_rate": 0.0001788613783919812,
      "loss": 2.4065,
      "step": 746
    },
    {
      "epoch": 0.2902309317403657,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00017877786172824952,
      "loss": 2.399,
      "step": 747
    },
    {
      "epoch": 0.2906194604307812,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001786941999864121,
      "loss": 2.4409,
      "step": 748
    },
    {
      "epoch": 0.2910079891211967,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001786103933205412,
      "loss": 2.3703,
      "step": 749
    },
    {
      "epoch": 0.29139651781161213,
      "grad_norm": 0.97265625,
      "learning_rate": 0.000178526441884976,
      "loss": 2.389,
      "step": 750
    },
    {
      "epoch": 0.29178504650202763,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017844234583432232,
      "loss": 2.4237,
      "step": 751
    },
    {
      "epoch": 0.29217357519244314,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00017835810532345231,
      "loss": 2.4474,
      "step": 752
    },
    {
      "epoch": 0.2925621038828586,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00017827372050750414,
      "loss": 2.3757,
      "step": 753
    },
    {
      "epoch": 0.2929506325732741,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00017818919154188179,
      "loss": 2.4017,
      "step": 754
    },
    {
      "epoch": 0.2933391612636896,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00017810451858225459,
      "loss": 2.4833,
      "step": 755
    },
    {
      "epoch": 0.29372768995410503,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001780197017845571,
      "loss": 2.4328,
      "step": 756
    },
    {
      "epoch": 0.29411621864452053,
      "grad_norm": 0.78515625,
      "learning_rate": 0.0001779347413049889,
      "loss": 2.3754,
      "step": 757
    },
    {
      "epoch": 0.29450474733493603,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00017784963730001395,
      "loss": 2.4475,
      "step": 758
    },
    {
      "epoch": 0.2948932760253515,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00017776438992636072,
      "loss": 2.4114,
      "step": 759
    },
    {
      "epoch": 0.295281804715767,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00017767899934102154,
      "loss": 2.4167,
      "step": 760
    },
    {
      "epoch": 0.2956703334061825,
      "grad_norm": 0.8046875,
      "learning_rate": 0.0001775934657012527,
      "loss": 2.4213,
      "step": 761
    },
    {
      "epoch": 0.29605886209659793,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00017750778916457373,
      "loss": 2.3988,
      "step": 762
    },
    {
      "epoch": 0.29644739078701343,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00017742196988876748,
      "loss": 2.416,
      "step": 763
    },
    {
      "epoch": 0.29683591947742893,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017733600803187952,
      "loss": 2.4384,
      "step": 764
    },
    {
      "epoch": 0.2972244481678444,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001772499037522181,
      "loss": 2.3419,
      "step": 765
    },
    {
      "epoch": 0.2976129768582599,
      "grad_norm": 0.7421875,
      "learning_rate": 0.0001771636572083537,
      "loss": 2.4816,
      "step": 766
    },
    {
      "epoch": 0.2980015055486754,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00017707726855911888,
      "loss": 2.4267,
      "step": 767
    },
    {
      "epoch": 0.29839003423909083,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001769907379636078,
      "loss": 2.3589,
      "step": 768
    },
    {
      "epoch": 0.29877856292950633,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00017690406558117615,
      "loss": 2.4548,
      "step": 769
    },
    {
      "epoch": 0.29916709161992183,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001768172515714406,
      "loss": 2.4251,
      "step": 770
    },
    {
      "epoch": 0.2995556203103373,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001767302960942787,
      "loss": 2.3883,
      "step": 771
    },
    {
      "epoch": 0.2999441490007528,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00017664319930982863,
      "loss": 2.4494,
      "step": 772
    },
    {
      "epoch": 0.3003326776911683,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00017655596137848857,
      "loss": 2.4177,
      "step": 773
    },
    {
      "epoch": 0.3007212063815837,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00017646858246091692,
      "loss": 2.393,
      "step": 774
    },
    {
      "epoch": 0.30110973507199923,
      "grad_norm": 0.90625,
      "learning_rate": 0.00017638106271803143,
      "loss": 2.5158,
      "step": 775
    },
    {
      "epoch": 0.30149826376241473,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00017629340231100943,
      "loss": 2.4587,
      "step": 776
    },
    {
      "epoch": 0.3018867924528302,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00017620560140128722,
      "loss": 2.4661,
      "step": 777
    },
    {
      "epoch": 0.3022753211432457,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001761176601505598,
      "loss": 2.4737,
      "step": 778
    },
    {
      "epoch": 0.3026638498336612,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001760295787207807,
      "loss": 2.4462,
      "step": 779
    },
    {
      "epoch": 0.3030523785240766,
      "grad_norm": 0.765625,
      "learning_rate": 0.00017594135727416155,
      "loss": 2.4242,
      "step": 780
    },
    {
      "epoch": 0.30344090721449213,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00017585299597317185,
      "loss": 2.4995,
      "step": 781
    },
    {
      "epoch": 0.30382943590490763,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00017576449498053867,
      "loss": 2.4151,
      "step": 782
    },
    {
      "epoch": 0.3042179645953231,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00017567585445924633,
      "loss": 2.3866,
      "step": 783
    },
    {
      "epoch": 0.3046064932857386,
      "grad_norm": 0.90625,
      "learning_rate": 0.00017558707457253613,
      "loss": 2.4381,
      "step": 784
    },
    {
      "epoch": 0.3049950219761541,
      "grad_norm": 0.83203125,
      "learning_rate": 0.000175498155483906,
      "loss": 2.4386,
      "step": 785
    },
    {
      "epoch": 0.3053835506665695,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001754090973571102,
      "loss": 2.4855,
      "step": 786
    },
    {
      "epoch": 0.305772079356985,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00017531990035615915,
      "loss": 2.3814,
      "step": 787
    },
    {
      "epoch": 0.3061606080474005,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00017523056464531885,
      "loss": 2.4152,
      "step": 788
    },
    {
      "epoch": 0.306549136737816,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001751410903891109,
      "loss": 2.4139,
      "step": 789
    },
    {
      "epoch": 0.3069376654282315,
      "grad_norm": 0.83203125,
      "learning_rate": 0.000175051477752312,
      "loss": 2.4099,
      "step": 790
    },
    {
      "epoch": 0.3073261941186469,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00017496172689995368,
      "loss": 2.4541,
      "step": 791
    },
    {
      "epoch": 0.3077147228090624,
      "grad_norm": 0.84375,
      "learning_rate": 0.00017487183799732203,
      "loss": 2.5013,
      "step": 792
    },
    {
      "epoch": 0.3081032514994779,
      "grad_norm": 0.796875,
      "learning_rate": 0.0001747818112099573,
      "loss": 2.413,
      "step": 793
    },
    {
      "epoch": 0.30849178018989337,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001746916467036538,
      "loss": 2.4217,
      "step": 794
    },
    {
      "epoch": 0.3088803088803089,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00017460134464445935,
      "loss": 2.4751,
      "step": 795
    },
    {
      "epoch": 0.3092688375707244,
      "grad_norm": 0.875,
      "learning_rate": 0.00017451090519867517,
      "loss": 2.4224,
      "step": 796
    },
    {
      "epoch": 0.3096573662611398,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00017442032853285543,
      "loss": 2.3641,
      "step": 797
    },
    {
      "epoch": 0.3100458949515553,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00017432961481380707,
      "loss": 2.5269,
      "step": 798
    },
    {
      "epoch": 0.3104344236419708,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00017423876420858932,
      "loss": 2.3866,
      "step": 799
    },
    {
      "epoch": 0.31082295233238627,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00017414777688451368,
      "loss": 2.4445,
      "step": 800
    },
    {
      "epoch": 0.3112114810228018,
      "grad_norm": 0.91015625,
      "learning_rate": 0.0001740566530091432,
      "loss": 2.4237,
      "step": 801
    },
    {
      "epoch": 0.3116000097132173,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00017396539275029262,
      "loss": 2.4056,
      "step": 802
    },
    {
      "epoch": 0.3119885384036327,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00017387399627602772,
      "loss": 2.4517,
      "step": 803
    },
    {
      "epoch": 0.3123770670940482,
      "grad_norm": 0.9375,
      "learning_rate": 0.00017378246375466513,
      "loss": 2.4313,
      "step": 804
    },
    {
      "epoch": 0.3127655957844637,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00017369079535477217,
      "loss": 2.4828,
      "step": 805
    },
    {
      "epoch": 0.31315412447487917,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00017359899124516623,
      "loss": 2.4158,
      "step": 806
    },
    {
      "epoch": 0.31354265316529467,
      "grad_norm": 0.921875,
      "learning_rate": 0.00017350705159491464,
      "loss": 2.3548,
      "step": 807
    },
    {
      "epoch": 0.3139311818557102,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00017341497657333448,
      "loss": 2.427,
      "step": 808
    },
    {
      "epoch": 0.3143197105461256,
      "grad_norm": 0.76171875,
      "learning_rate": 0.000173322766349992,
      "loss": 2.4104,
      "step": 809
    },
    {
      "epoch": 0.3147082392365411,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017323042109470245,
      "loss": 2.4454,
      "step": 810
    },
    {
      "epoch": 0.3150967679269566,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001731379409775298,
      "loss": 2.4989,
      "step": 811
    },
    {
      "epoch": 0.31548529661737207,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001730453261687864,
      "loss": 2.4115,
      "step": 812
    },
    {
      "epoch": 0.31587382530778757,
      "grad_norm": 0.84375,
      "learning_rate": 0.00017295257683903257,
      "loss": 2.3898,
      "step": 813
    },
    {
      "epoch": 0.31626235399820307,
      "grad_norm": 0.96875,
      "learning_rate": 0.00017285969315907648,
      "loss": 2.3612,
      "step": 814
    },
    {
      "epoch": 0.3166508826886185,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00017276667529997355,
      "loss": 2.4465,
      "step": 815
    },
    {
      "epoch": 0.317039411379034,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001726735234330265,
      "loss": 2.4731,
      "step": 816
    },
    {
      "epoch": 0.3174279400694495,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001725802377297847,
      "loss": 2.4403,
      "step": 817
    },
    {
      "epoch": 0.31781646875986497,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001724868183620441,
      "loss": 2.3637,
      "step": 818
    },
    {
      "epoch": 0.31820499745028047,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00017239326550184668,
      "loss": 2.3778,
      "step": 819
    },
    {
      "epoch": 0.31859352614069597,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00017229957932148035,
      "loss": 2.451,
      "step": 820
    },
    {
      "epoch": 0.3189820548311114,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00017220575999347856,
      "loss": 2.3804,
      "step": 821
    },
    {
      "epoch": 0.3193705835215269,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001721118076906199,
      "loss": 2.3504,
      "step": 822
    },
    {
      "epoch": 0.3197591122119424,
      "grad_norm": 0.796875,
      "learning_rate": 0.0001720177225859279,
      "loss": 2.4297,
      "step": 823
    },
    {
      "epoch": 0.32014764090235787,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00017192350485267064,
      "loss": 2.3792,
      "step": 824
    },
    {
      "epoch": 0.32053616959277337,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00017182915466436045,
      "loss": 2.3826,
      "step": 825
    },
    {
      "epoch": 0.32092469828318887,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00017173467219475352,
      "loss": 2.3815,
      "step": 826
    },
    {
      "epoch": 0.3213132269736043,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00017164005761784984,
      "loss": 2.4616,
      "step": 827
    },
    {
      "epoch": 0.3217017556640198,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00017154531110789248,
      "loss": 2.3989,
      "step": 828
    },
    {
      "epoch": 0.3220902843544353,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001714504328393676,
      "loss": 2.4294,
      "step": 829
    },
    {
      "epoch": 0.32247881304485077,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00017135542298700397,
      "loss": 2.4438,
      "step": 830
    },
    {
      "epoch": 0.32286734173526627,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00017126028172577274,
      "loss": 2.4648,
      "step": 831
    },
    {
      "epoch": 0.32325587042568177,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017116500923088697,
      "loss": 2.5006,
      "step": 832
    },
    {
      "epoch": 0.3236443991160972,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001710696056778014,
      "loss": 2.4287,
      "step": 833
    },
    {
      "epoch": 0.3240329278065127,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001709740712422123,
      "loss": 2.3856,
      "step": 834
    },
    {
      "epoch": 0.3244214564969282,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00017087840610005675,
      "loss": 2.4137,
      "step": 835
    },
    {
      "epoch": 0.32480998518734366,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001707826104275127,
      "loss": 2.5085,
      "step": 836
    },
    {
      "epoch": 0.32519851387775917,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001706866844009984,
      "loss": 2.3359,
      "step": 837
    },
    {
      "epoch": 0.32558704256817467,
      "grad_norm": 0.703125,
      "learning_rate": 0.00017059062819717218,
      "loss": 2.3356,
      "step": 838
    },
    {
      "epoch": 0.3259755712585901,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00017049444199293215,
      "loss": 2.4243,
      "step": 839
    },
    {
      "epoch": 0.3263640999490056,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00017039812596541574,
      "loss": 2.4393,
      "step": 840
    },
    {
      "epoch": 0.3267526286394211,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00017030168029199958,
      "loss": 2.3736,
      "step": 841
    },
    {
      "epoch": 0.32714115732983656,
      "grad_norm": 0.875,
      "learning_rate": 0.00017020510515029894,
      "loss": 2.4372,
      "step": 842
    },
    {
      "epoch": 0.32752968602025206,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00017010840071816764,
      "loss": 2.4257,
      "step": 843
    },
    {
      "epoch": 0.32791821471066757,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001700115671736975,
      "loss": 2.3996,
      "step": 844
    },
    {
      "epoch": 0.328306743401083,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00016991460469521817,
      "loss": 2.3186,
      "step": 845
    },
    {
      "epoch": 0.3286952720914985,
      "grad_norm": 1.15625,
      "learning_rate": 0.00016981751346129668,
      "loss": 2.4109,
      "step": 846
    },
    {
      "epoch": 0.329083800781914,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001697202936507373,
      "loss": 2.4756,
      "step": 847
    },
    {
      "epoch": 0.32947232947232946,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016962294544258096,
      "loss": 2.4485,
      "step": 848
    },
    {
      "epoch": 0.32986085816274496,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00016952546901610513,
      "loss": 2.3982,
      "step": 849
    },
    {
      "epoch": 0.33024938685316046,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001694278645508234,
      "loss": 2.4209,
      "step": 850
    },
    {
      "epoch": 0.3306379155435759,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00016933013222648508,
      "loss": 2.4256,
      "step": 851
    },
    {
      "epoch": 0.3310264442339914,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00016923227222307506,
      "loss": 2.4372,
      "step": 852
    },
    {
      "epoch": 0.3314149729244069,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00016913428472081326,
      "loss": 2.366,
      "step": 853
    },
    {
      "epoch": 0.33180350161482236,
      "grad_norm": 0.8125,
      "learning_rate": 0.00016903616990015453,
      "loss": 2.3984,
      "step": 854
    },
    {
      "epoch": 0.33219203030523786,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00016893792794178805,
      "loss": 2.3797,
      "step": 855
    },
    {
      "epoch": 0.3325805589956533,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001688395590266372,
      "loss": 2.4639,
      "step": 856
    },
    {
      "epoch": 0.3329690876860688,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001687410633358592,
      "loss": 2.378,
      "step": 857
    },
    {
      "epoch": 0.3333576163764843,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00016864244105084473,
      "loss": 2.4141,
      "step": 858
    },
    {
      "epoch": 0.33374614506689976,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00016854369235321754,
      "loss": 2.4147,
      "step": 859
    },
    {
      "epoch": 0.33413467375731526,
      "grad_norm": 0.78125,
      "learning_rate": 0.00016844481742483424,
      "loss": 2.3797,
      "step": 860
    },
    {
      "epoch": 0.33452320244773076,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001683458164477839,
      "loss": 2.3711,
      "step": 861
    },
    {
      "epoch": 0.3349117311381462,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00016824668960438771,
      "loss": 2.4823,
      "step": 862
    },
    {
      "epoch": 0.3353002598285617,
      "grad_norm": 0.875,
      "learning_rate": 0.00016814743707719868,
      "loss": 2.464,
      "step": 863
    },
    {
      "epoch": 0.3356887885189772,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00016804805904900127,
      "loss": 2.4662,
      "step": 864
    },
    {
      "epoch": 0.33607731720939266,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00016794855570281106,
      "loss": 2.3954,
      "step": 865
    },
    {
      "epoch": 0.33646584589980816,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00016784892722187438,
      "loss": 2.4792,
      "step": 866
    },
    {
      "epoch": 0.33685437459022366,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001677491737896681,
      "loss": 2.3987,
      "step": 867
    },
    {
      "epoch": 0.3372429032806391,
      "grad_norm": 0.78125,
      "learning_rate": 0.00016764929558989907,
      "loss": 2.3545,
      "step": 868
    },
    {
      "epoch": 0.3376314319710546,
      "grad_norm": 0.91015625,
      "learning_rate": 0.0001675492928065041,
      "loss": 2.3862,
      "step": 869
    },
    {
      "epoch": 0.3380199606614701,
      "grad_norm": 0.84375,
      "learning_rate": 0.00016744916562364928,
      "loss": 2.4111,
      "step": 870
    },
    {
      "epoch": 0.33840848935188556,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001673489142257298,
      "loss": 2.4104,
      "step": 871
    },
    {
      "epoch": 0.33879701804230106,
      "grad_norm": 0.8046875,
      "learning_rate": 0.0001672485387973697,
      "loss": 2.3908,
      "step": 872
    },
    {
      "epoch": 0.33918554673271656,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00016714803952342134,
      "loss": 2.3626,
      "step": 873
    },
    {
      "epoch": 0.339574075423132,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00016704741658896526,
      "loss": 2.4738,
      "step": 874
    },
    {
      "epoch": 0.3399626041135475,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001669466701793096,
      "loss": 2.515,
      "step": 875
    },
    {
      "epoch": 0.340351132803963,
      "grad_norm": 0.78125,
      "learning_rate": 0.00016684580047999,
      "loss": 2.4536,
      "step": 876
    },
    {
      "epoch": 0.34073966149437845,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001667448076767691,
      "loss": 2.4085,
      "step": 877
    },
    {
      "epoch": 0.34112819018479396,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00016664369195563633,
      "loss": 2.4197,
      "step": 878
    },
    {
      "epoch": 0.34151671887520946,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00016654245350280737,
      "loss": 2.3588,
      "step": 879
    },
    {
      "epoch": 0.3419052475656249,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00016644109250472396,
      "loss": 2.4536,
      "step": 880
    },
    {
      "epoch": 0.3422937762560404,
      "grad_norm": 0.796875,
      "learning_rate": 0.00016633960914805358,
      "loss": 2.4565,
      "step": 881
    },
    {
      "epoch": 0.3426823049464559,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00016623800361968894,
      "loss": 2.4165,
      "step": 882
    },
    {
      "epoch": 0.34307083363687135,
      "grad_norm": 0.74609375,
      "learning_rate": 0.0001661362761067479,
      "loss": 2.3367,
      "step": 883
    },
    {
      "epoch": 0.34345936232728685,
      "grad_norm": 0.7890625,
      "learning_rate": 0.0001660344267965728,
      "loss": 2.3624,
      "step": 884
    },
    {
      "epoch": 0.34384789101770236,
      "grad_norm": 0.75,
      "learning_rate": 0.0001659324558767304,
      "loss": 2.4241,
      "step": 885
    },
    {
      "epoch": 0.3442364197081178,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00016583036353501134,
      "loss": 2.4773,
      "step": 886
    },
    {
      "epoch": 0.3446249483985333,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00016572814995942993,
      "loss": 2.4632,
      "step": 887
    },
    {
      "epoch": 0.3450134770889488,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00016562581533822375,
      "loss": 2.3947,
      "step": 888
    },
    {
      "epoch": 0.34540200577936425,
      "grad_norm": 0.796875,
      "learning_rate": 0.0001655233598598532,
      "loss": 2.4371,
      "step": 889
    },
    {
      "epoch": 0.34579053446977975,
      "grad_norm": 0.734375,
      "learning_rate": 0.0001654207837130014,
      "loss": 2.377,
      "step": 890
    },
    {
      "epoch": 0.34617906316019526,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001653180870865736,
      "loss": 2.3894,
      "step": 891
    },
    {
      "epoch": 0.3465675918506107,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00016521527016969689,
      "loss": 2.427,
      "step": 892
    },
    {
      "epoch": 0.3469561205410262,
      "grad_norm": 0.78125,
      "learning_rate": 0.00016511233315172,
      "loss": 2.4122,
      "step": 893
    },
    {
      "epoch": 0.3473446492314417,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00016500927622221275,
      "loss": 2.4176,
      "step": 894
    },
    {
      "epoch": 0.34773317792185715,
      "grad_norm": 0.765625,
      "learning_rate": 0.00016490609957096588,
      "loss": 2.4046,
      "step": 895
    },
    {
      "epoch": 0.34812170661227265,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001648028033879905,
      "loss": 2.3702,
      "step": 896
    },
    {
      "epoch": 0.34851023530268815,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00016469938786351786,
      "loss": 2.4124,
      "step": 897
    },
    {
      "epoch": 0.3488987639931036,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00016459585318799914,
      "loss": 2.3884,
      "step": 898
    },
    {
      "epoch": 0.3492872926835191,
      "grad_norm": 0.765625,
      "learning_rate": 0.00016449219955210476,
      "loss": 2.38,
      "step": 899
    },
    {
      "epoch": 0.3496758213739346,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00016438842714672436,
      "loss": 2.4462,
      "step": 900
    },
    {
      "epoch": 0.35006435006435005,
      "grad_norm": 0.78125,
      "learning_rate": 0.0001642845361629662,
      "loss": 2.39,
      "step": 901
    },
    {
      "epoch": 0.35045287875476555,
      "grad_norm": 0.77734375,
      "learning_rate": 0.000164180526792157,
      "loss": 2.4532,
      "step": 902
    },
    {
      "epoch": 0.35084140744518105,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001640763992258415,
      "loss": 2.4183,
      "step": 903
    },
    {
      "epoch": 0.3512299361355965,
      "grad_norm": 0.828125,
      "learning_rate": 0.00016397215365578197,
      "loss": 2.4412,
      "step": 904
    },
    {
      "epoch": 0.351618464826012,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00016386779027395822,
      "loss": 2.4002,
      "step": 905
    },
    {
      "epoch": 0.3520069935164275,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00016376330927256682,
      "loss": 2.4019,
      "step": 906
    },
    {
      "epoch": 0.35239552220684295,
      "grad_norm": 0.765625,
      "learning_rate": 0.00016365871084402108,
      "loss": 2.4112,
      "step": 907
    },
    {
      "epoch": 0.35278405089725845,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00016355399518095052,
      "loss": 2.3778,
      "step": 908
    },
    {
      "epoch": 0.35317257958767395,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001634491624762006,
      "loss": 2.4154,
      "step": 909
    },
    {
      "epoch": 0.3535611082780894,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001633442129228322,
      "loss": 2.4527,
      "step": 910
    },
    {
      "epoch": 0.3539496369685049,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00016323914671412154,
      "loss": 2.3572,
      "step": 911
    },
    {
      "epoch": 0.3543381656589204,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001631339640435596,
      "loss": 2.4506,
      "step": 912
    },
    {
      "epoch": 0.35472669434933585,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00016302866510485182,
      "loss": 2.3681,
      "step": 913
    },
    {
      "epoch": 0.35511522303975135,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00016292325009191784,
      "loss": 2.4713,
      "step": 914
    },
    {
      "epoch": 0.35550375173016685,
      "grad_norm": 0.8125,
      "learning_rate": 0.00016281771919889098,
      "loss": 2.4329,
      "step": 915
    },
    {
      "epoch": 0.3558922804205823,
      "grad_norm": 0.93359375,
      "learning_rate": 0.000162712072620118,
      "loss": 2.4268,
      "step": 916
    },
    {
      "epoch": 0.3562808091109978,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001626063105501587,
      "loss": 2.4348,
      "step": 917
    },
    {
      "epoch": 0.3566693378014133,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00016250043318378563,
      "loss": 2.3795,
      "step": 918
    },
    {
      "epoch": 0.35705786649182875,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00016239444071598354,
      "loss": 2.4631,
      "step": 919
    },
    {
      "epoch": 0.35744639518224425,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00016228833334194927,
      "loss": 2.3645,
      "step": 920
    },
    {
      "epoch": 0.35783492387265975,
      "grad_norm": 0.734375,
      "learning_rate": 0.00016218211125709124,
      "loss": 2.4475,
      "step": 921
    },
    {
      "epoch": 0.3582234525630752,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00016207577465702908,
      "loss": 2.4651,
      "step": 922
    },
    {
      "epoch": 0.3586119812534907,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00016196932373759338,
      "loss": 2.416,
      "step": 923
    },
    {
      "epoch": 0.35900050994390614,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001618627586948252,
      "loss": 2.3977,
      "step": 924
    },
    {
      "epoch": 0.35938903863432164,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00016175607972497585,
      "loss": 2.3937,
      "step": 925
    },
    {
      "epoch": 0.35977756732473715,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00016164928702450633,
      "loss": 2.3936,
      "step": 926
    },
    {
      "epoch": 0.3601660960151526,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00016154238079008719,
      "loss": 2.3655,
      "step": 927
    },
    {
      "epoch": 0.3605546247055681,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00016143536121859803,
      "loss": 2.4051,
      "step": 928
    },
    {
      "epoch": 0.3609431533959836,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001613282285071272,
      "loss": 2.3786,
      "step": 929
    },
    {
      "epoch": 0.36133168208639904,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00016122098285297132,
      "loss": 2.4234,
      "step": 930
    },
    {
      "epoch": 0.36172021077681454,
      "grad_norm": 0.8125,
      "learning_rate": 0.00016111362445363512,
      "loss": 2.3992,
      "step": 931
    },
    {
      "epoch": 0.36210873946723005,
      "grad_norm": 0.71484375,
      "learning_rate": 0.0001610061535068309,
      "loss": 2.3751,
      "step": 932
    },
    {
      "epoch": 0.3624972681576455,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00016089857021047823,
      "loss": 2.4276,
      "step": 933
    },
    {
      "epoch": 0.362885796848061,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001607908747627036,
      "loss": 2.3446,
      "step": 934
    },
    {
      "epoch": 0.3632743255384765,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00016068306736184004,
      "loss": 2.3987,
      "step": 935
    },
    {
      "epoch": 0.36366285422889194,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00016057514820642677,
      "loss": 2.4452,
      "step": 936
    },
    {
      "epoch": 0.36405138291930744,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00016046711749520876,
      "loss": 2.4444,
      "step": 937
    },
    {
      "epoch": 0.36443991160972294,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00016035897542713648,
      "loss": 2.3775,
      "step": 938
    },
    {
      "epoch": 0.3648284403001384,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00016025072220136543,
      "loss": 2.4025,
      "step": 939
    },
    {
      "epoch": 0.3652169689905539,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00016014235801725587,
      "loss": 2.4327,
      "step": 940
    },
    {
      "epoch": 0.3656054976809694,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00016003388307437238,
      "loss": 2.4564,
      "step": 941
    },
    {
      "epoch": 0.36599402637138484,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00015992529757248347,
      "loss": 2.4282,
      "step": 942
    },
    {
      "epoch": 0.36638255506180034,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015981660171156134,
      "loss": 2.3523,
      "step": 943
    },
    {
      "epoch": 0.36677108375221584,
      "grad_norm": 0.78125,
      "learning_rate": 0.00015970779569178138,
      "loss": 2.4013,
      "step": 944
    },
    {
      "epoch": 0.3671596124426313,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00015959887971352184,
      "loss": 2.3586,
      "step": 945
    },
    {
      "epoch": 0.3675481411330468,
      "grad_norm": 0.84375,
      "learning_rate": 0.00015948985397736348,
      "loss": 2.4387,
      "step": 946
    },
    {
      "epoch": 0.3679366698234623,
      "grad_norm": 0.8125,
      "learning_rate": 0.00015938071868408922,
      "loss": 2.3555,
      "step": 947
    },
    {
      "epoch": 0.36832519851387774,
      "grad_norm": 0.75,
      "learning_rate": 0.00015927147403468369,
      "loss": 2.3974,
      "step": 948
    },
    {
      "epoch": 0.36871372720429324,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00015916212023033298,
      "loss": 2.4057,
      "step": 949
    },
    {
      "epoch": 0.36910225589470874,
      "grad_norm": 0.75,
      "learning_rate": 0.00015905265747242413,
      "loss": 2.4402,
      "step": 950
    },
    {
      "epoch": 0.3694907845851242,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00015894308596254485,
      "loss": 2.4567,
      "step": 951
    },
    {
      "epoch": 0.3698793132755397,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00015883340590248314,
      "loss": 2.4268,
      "step": 952
    },
    {
      "epoch": 0.3702678419659552,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00015872361749422694,
      "loss": 2.379,
      "step": 953
    },
    {
      "epoch": 0.37065637065637064,
      "grad_norm": 0.75390625,
      "learning_rate": 0.0001586137209399637,
      "loss": 2.4159,
      "step": 954
    },
    {
      "epoch": 0.37104489934678614,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00015850371644207995,
      "loss": 2.4021,
      "step": 955
    },
    {
      "epoch": 0.37143342803720164,
      "grad_norm": 0.734375,
      "learning_rate": 0.00015839360420316116,
      "loss": 2.4081,
      "step": 956
    },
    {
      "epoch": 0.3718219567276171,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00015828338442599112,
      "loss": 2.4259,
      "step": 957
    },
    {
      "epoch": 0.3722104854180326,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00015817305731355167,
      "loss": 2.4069,
      "step": 958
    },
    {
      "epoch": 0.3725990141084481,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00015806262306902234,
      "loss": 2.4049,
      "step": 959
    },
    {
      "epoch": 0.37298754279886354,
      "grad_norm": 0.8125,
      "learning_rate": 0.00015795208189577995,
      "loss": 2.412,
      "step": 960
    },
    {
      "epoch": 0.37337607148927904,
      "grad_norm": 0.796875,
      "learning_rate": 0.00015784143399739822,
      "loss": 2.4012,
      "step": 961
    },
    {
      "epoch": 0.37376460017969454,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00015773067957764752,
      "loss": 2.3994,
      "step": 962
    },
    {
      "epoch": 0.37415312887011,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001576198188404942,
      "loss": 2.4166,
      "step": 963
    },
    {
      "epoch": 0.3745416575605255,
      "grad_norm": 0.7421875,
      "learning_rate": 0.0001575088519901006,
      "loss": 2.3468,
      "step": 964
    },
    {
      "epoch": 0.374930186250941,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001573977792308243,
      "loss": 2.4512,
      "step": 965
    },
    {
      "epoch": 0.37531871494135643,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00015728660076721812,
      "loss": 2.3884,
      "step": 966
    },
    {
      "epoch": 0.37570724363177194,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00015717531680402934,
      "loss": 2.4164,
      "step": 967
    },
    {
      "epoch": 0.37609577232218744,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00015706392754619974,
      "loss": 2.4216,
      "step": 968
    },
    {
      "epoch": 0.3764843010126029,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00015695243319886483,
      "loss": 2.3632,
      "step": 969
    },
    {
      "epoch": 0.3768728297030184,
      "grad_norm": 0.828125,
      "learning_rate": 0.00015684083396735372,
      "loss": 2.3983,
      "step": 970
    },
    {
      "epoch": 0.3772613583934339,
      "grad_norm": 0.796875,
      "learning_rate": 0.0001567291300571887,
      "loss": 2.4777,
      "step": 971
    },
    {
      "epoch": 0.37764988708384933,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00015661732167408484,
      "loss": 2.431,
      "step": 972
    },
    {
      "epoch": 0.37803841577426484,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00015650540902394954,
      "loss": 2.486,
      "step": 973
    },
    {
      "epoch": 0.37842694446468034,
      "grad_norm": 0.78125,
      "learning_rate": 0.00015639339231288233,
      "loss": 2.4245,
      "step": 974
    },
    {
      "epoch": 0.3788154731550958,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015628127174717427,
      "loss": 2.3979,
      "step": 975
    },
    {
      "epoch": 0.3792040018455113,
      "grad_norm": 0.75,
      "learning_rate": 0.00015616904753330775,
      "loss": 2.3735,
      "step": 976
    },
    {
      "epoch": 0.3795925305359268,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00015605671987795595,
      "loss": 2.4248,
      "step": 977
    },
    {
      "epoch": 0.37998105922634223,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015594428898798272,
      "loss": 2.4653,
      "step": 978
    },
    {
      "epoch": 0.38036958791675773,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00015583175507044185,
      "loss": 2.4554,
      "step": 979
    },
    {
      "epoch": 0.38075811660717324,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00015571911833257696,
      "loss": 2.4347,
      "step": 980
    },
    {
      "epoch": 0.3811466452975887,
      "grad_norm": 0.78515625,
      "learning_rate": 0.000155606378981821,
      "loss": 2.4075,
      "step": 981
    },
    {
      "epoch": 0.3815351739880042,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00015549353722579592,
      "loss": 2.3812,
      "step": 982
    },
    {
      "epoch": 0.3819237026784197,
      "grad_norm": 0.6953125,
      "learning_rate": 0.0001553805932723122,
      "loss": 2.2825,
      "step": 983
    },
    {
      "epoch": 0.38231223136883513,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00015526754732936862,
      "loss": 2.4434,
      "step": 984
    },
    {
      "epoch": 0.38270076005925063,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015515439960515166,
      "loss": 2.3885,
      "step": 985
    },
    {
      "epoch": 0.38308928874966613,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015504115030803535,
      "loss": 2.3675,
      "step": 986
    },
    {
      "epoch": 0.3834778174400816,
      "grad_norm": 0.75,
      "learning_rate": 0.00015492779964658074,
      "loss": 2.3827,
      "step": 987
    },
    {
      "epoch": 0.3838663461304971,
      "grad_norm": 0.859375,
      "learning_rate": 0.00015481434782953555,
      "loss": 2.4087,
      "step": 988
    },
    {
      "epoch": 0.3842548748209126,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00015470079506583377,
      "loss": 2.4245,
      "step": 989
    },
    {
      "epoch": 0.38464340351132803,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00015458714156459536,
      "loss": 2.376,
      "step": 990
    },
    {
      "epoch": 0.38503193220174353,
      "grad_norm": 0.90625,
      "learning_rate": 0.00015447338753512573,
      "loss": 2.3831,
      "step": 991
    },
    {
      "epoch": 0.385420460892159,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00015435953318691543,
      "loss": 2.4737,
      "step": 992
    },
    {
      "epoch": 0.3858089895825745,
      "grad_norm": 0.796875,
      "learning_rate": 0.00015424557872963982,
      "loss": 2.4105,
      "step": 993
    },
    {
      "epoch": 0.38619751827299,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00015413152437315856,
      "loss": 2.4475,
      "step": 994
    },
    {
      "epoch": 0.3865860469634054,
      "grad_norm": 0.78125,
      "learning_rate": 0.00015401737032751532,
      "loss": 2.455,
      "step": 995
    },
    {
      "epoch": 0.38697457565382093,
      "grad_norm": 0.734375,
      "learning_rate": 0.00015390311680293727,
      "loss": 2.3793,
      "step": 996
    },
    {
      "epoch": 0.38736310434423643,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00015378876400983494,
      "loss": 2.406,
      "step": 997
    },
    {
      "epoch": 0.3877516330346519,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00015367431215880157,
      "loss": 2.4224,
      "step": 998
    },
    {
      "epoch": 0.3881401617250674,
      "grad_norm": 1.015625,
      "learning_rate": 0.00015355976146061285,
      "loss": 2.4179,
      "step": 999
    },
    {
      "epoch": 0.3885286904154829,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00015344511212622648,
      "loss": 2.4217,
      "step": 1000
    },
    {
      "epoch": 0.3889172191058983,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00015333036436678183,
      "loss": 2.4036,
      "step": 1001
    },
    {
      "epoch": 0.38930574779631383,
      "grad_norm": 0.78125,
      "learning_rate": 0.00015321551839359953,
      "loss": 2.472,
      "step": 1002
    },
    {
      "epoch": 0.38969427648672933,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00015310057441818113,
      "loss": 2.4167,
      "step": 1003
    },
    {
      "epoch": 0.3900828051771448,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00015298553265220854,
      "loss": 2.4069,
      "step": 1004
    },
    {
      "epoch": 0.3904713338675603,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001528703933075439,
      "loss": 2.3354,
      "step": 1005
    },
    {
      "epoch": 0.3908598625579758,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00015275515659622898,
      "loss": 2.3972,
      "step": 1006
    },
    {
      "epoch": 0.3912483912483912,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00015263982273048488,
      "loss": 2.4468,
      "step": 1007
    },
    {
      "epoch": 0.3916369199388067,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00015252439192271156,
      "loss": 2.4281,
      "step": 1008
    },
    {
      "epoch": 0.39202544862922223,
      "grad_norm": 0.78125,
      "learning_rate": 0.0001524088643854876,
      "loss": 2.3852,
      "step": 1009
    },
    {
      "epoch": 0.3924139773196377,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00015229324033156968,
      "loss": 2.4179,
      "step": 1010
    },
    {
      "epoch": 0.3928025060100532,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00015217751997389223,
      "loss": 2.4206,
      "step": 1011
    },
    {
      "epoch": 0.3931910347004687,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00015206170352556703,
      "loss": 2.3465,
      "step": 1012
    },
    {
      "epoch": 0.3935795633908841,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001519457911998828,
      "loss": 2.3244,
      "step": 1013
    },
    {
      "epoch": 0.3939680920812996,
      "grad_norm": 0.796875,
      "learning_rate": 0.00015182978321030482,
      "loss": 2.3715,
      "step": 1014
    },
    {
      "epoch": 0.3943566207717151,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00015171367977047465,
      "loss": 2.396,
      "step": 1015
    },
    {
      "epoch": 0.3947451494621306,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00015159748109420956,
      "loss": 2.4217,
      "step": 1016
    },
    {
      "epoch": 0.3951336781525461,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00015148118739550213,
      "loss": 2.4046,
      "step": 1017
    },
    {
      "epoch": 0.3955222068429616,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00015136479888852006,
      "loss": 2.3725,
      "step": 1018
    },
    {
      "epoch": 0.395910735533377,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00015124831578760558,
      "loss": 2.3309,
      "step": 1019
    },
    {
      "epoch": 0.3962992642237925,
      "grad_norm": 0.8125,
      "learning_rate": 0.00015113173830727514,
      "loss": 2.4652,
      "step": 1020
    },
    {
      "epoch": 0.396687792914208,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00015101506666221904,
      "loss": 2.3596,
      "step": 1021
    },
    {
      "epoch": 0.3970763216046235,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00015089830106730095,
      "loss": 2.311,
      "step": 1022
    },
    {
      "epoch": 0.397464850295039,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015078144173755754,
      "loss": 2.3798,
      "step": 1023
    },
    {
      "epoch": 0.3978533789854545,
      "grad_norm": 0.734375,
      "learning_rate": 0.00015066448888819816,
      "loss": 2.3415,
      "step": 1024
    },
    {
      "epoch": 0.3982419076758699,
      "grad_norm": 0.78125,
      "learning_rate": 0.00015054744273460436,
      "loss": 2.4462,
      "step": 1025
    },
    {
      "epoch": 0.3986304363662854,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00015043030349232947,
      "loss": 2.3352,
      "step": 1026
    },
    {
      "epoch": 0.3990189650567009,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00015031307137709833,
      "loss": 2.3445,
      "step": 1027
    },
    {
      "epoch": 0.39940749374711637,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00015019574660480684,
      "loss": 2.342,
      "step": 1028
    },
    {
      "epoch": 0.3997960224375319,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00015007832939152143,
      "loss": 2.4521,
      "step": 1029
    },
    {
      "epoch": 0.4001845511279474,
      "grad_norm": 0.78125,
      "learning_rate": 0.00014996081995347885,
      "loss": 2.3904,
      "step": 1030
    },
    {
      "epoch": 0.4005730798183628,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00014984321850708562,
      "loss": 2.4335,
      "step": 1031
    },
    {
      "epoch": 0.4009616085087783,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00014972552526891782,
      "loss": 2.4618,
      "step": 1032
    },
    {
      "epoch": 0.4013501371991938,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00014960774045572045,
      "loss": 2.4655,
      "step": 1033
    },
    {
      "epoch": 0.40173866588960927,
      "grad_norm": 0.703125,
      "learning_rate": 0.00014948986428440727,
      "loss": 2.2844,
      "step": 1034
    },
    {
      "epoch": 0.40212719458002477,
      "grad_norm": 0.734375,
      "learning_rate": 0.0001493718969720602,
      "loss": 2.3762,
      "step": 1035
    },
    {
      "epoch": 0.4025157232704403,
      "grad_norm": 0.78125,
      "learning_rate": 0.00014925383873592895,
      "loss": 2.4462,
      "step": 1036
    },
    {
      "epoch": 0.4029042519608557,
      "grad_norm": 0.73828125,
      "learning_rate": 0.0001491356897934309,
      "loss": 2.4108,
      "step": 1037
    },
    {
      "epoch": 0.4032927806512712,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00014901745036215023,
      "loss": 2.4457,
      "step": 1038
    },
    {
      "epoch": 0.4036813093416867,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00014889912065983794,
      "loss": 2.2958,
      "step": 1039
    },
    {
      "epoch": 0.40406983803210217,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001487807009044112,
      "loss": 2.3535,
      "step": 1040
    },
    {
      "epoch": 0.40445836672251767,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00014866219131395297,
      "loss": 2.403,
      "step": 1041
    },
    {
      "epoch": 0.40484689541293317,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00014854359210671173,
      "loss": 2.397,
      "step": 1042
    },
    {
      "epoch": 0.4052354241033486,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00014842490350110103,
      "loss": 2.4236,
      "step": 1043
    },
    {
      "epoch": 0.4056239527937641,
      "grad_norm": 0.84375,
      "learning_rate": 0.00014830612571569896,
      "loss": 2.3644,
      "step": 1044
    },
    {
      "epoch": 0.4060124814841796,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00014818725896924786,
      "loss": 2.4009,
      "step": 1045
    },
    {
      "epoch": 0.40640101017459507,
      "grad_norm": 0.73046875,
      "learning_rate": 0.000148068303480654,
      "loss": 2.3757,
      "step": 1046
    },
    {
      "epoch": 0.40678953886501057,
      "grad_norm": 0.796875,
      "learning_rate": 0.000147949259468987,
      "loss": 2.3917,
      "step": 1047
    },
    {
      "epoch": 0.40717806755542607,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00014783012715347945,
      "loss": 2.4178,
      "step": 1048
    },
    {
      "epoch": 0.4075665962458415,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00014771090675352665,
      "loss": 2.412,
      "step": 1049
    },
    {
      "epoch": 0.407955124936257,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00014759159848868613,
      "loss": 2.4158,
      "step": 1050
    },
    {
      "epoch": 0.4083436536266725,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00014747220257867717,
      "loss": 2.3423,
      "step": 1051
    },
    {
      "epoch": 0.40873218231708797,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001473527192433805,
      "loss": 2.3611,
      "step": 1052
    },
    {
      "epoch": 0.40912071100750347,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001472331487028378,
      "loss": 2.369,
      "step": 1053
    },
    {
      "epoch": 0.40950923969791897,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00014711349117725144,
      "loss": 2.3826,
      "step": 1054
    },
    {
      "epoch": 0.4098977683883344,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001469937468869839,
      "loss": 2.4114,
      "step": 1055
    },
    {
      "epoch": 0.4102862970787499,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00014687391605255746,
      "loss": 2.408,
      "step": 1056
    },
    {
      "epoch": 0.4106748257691654,
      "grad_norm": 0.75390625,
      "learning_rate": 0.0001467539988946538,
      "loss": 2.3509,
      "step": 1057
    },
    {
      "epoch": 0.41106335445958087,
      "grad_norm": 0.8125,
      "learning_rate": 0.00014663399563411358,
      "loss": 2.4604,
      "step": 1058
    },
    {
      "epoch": 0.41145188314999637,
      "grad_norm": 0.75,
      "learning_rate": 0.00014651390649193598,
      "loss": 2.409,
      "step": 1059
    },
    {
      "epoch": 0.4118404118404118,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00014639373168927845,
      "loss": 2.4373,
      "step": 1060
    },
    {
      "epoch": 0.4122289405308273,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001462734714474561,
      "loss": 2.4317,
      "step": 1061
    },
    {
      "epoch": 0.4126174692212428,
      "grad_norm": 0.78125,
      "learning_rate": 0.00014615312598794135,
      "loss": 2.4266,
      "step": 1062
    },
    {
      "epoch": 0.41300599791165826,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00014603269553236365,
      "loss": 2.3566,
      "step": 1063
    },
    {
      "epoch": 0.41339452660207376,
      "grad_norm": 0.84375,
      "learning_rate": 0.00014591218030250892,
      "loss": 2.4233,
      "step": 1064
    },
    {
      "epoch": 0.41378305529248927,
      "grad_norm": 0.73828125,
      "learning_rate": 0.0001457915805203193,
      "loss": 2.4206,
      "step": 1065
    },
    {
      "epoch": 0.4141715839829047,
      "grad_norm": 0.71875,
      "learning_rate": 0.00014567089640789247,
      "loss": 2.3427,
      "step": 1066
    },
    {
      "epoch": 0.4145601126733202,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00014555012818748156,
      "loss": 2.3342,
      "step": 1067
    },
    {
      "epoch": 0.4149486413637357,
      "grad_norm": 0.734375,
      "learning_rate": 0.00014542927608149456,
      "loss": 2.3007,
      "step": 1068
    },
    {
      "epoch": 0.41533717005415116,
      "grad_norm": 0.78515625,
      "learning_rate": 0.0001453083403124939,
      "loss": 2.3456,
      "step": 1069
    },
    {
      "epoch": 0.41572569874456666,
      "grad_norm": 0.921875,
      "learning_rate": 0.00014518732110319613,
      "loss": 2.3655,
      "step": 1070
    },
    {
      "epoch": 0.41611422743498216,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00014506621867647142,
      "loss": 2.3755,
      "step": 1071
    },
    {
      "epoch": 0.4165027561253976,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00014494503325534322,
      "loss": 2.3675,
      "step": 1072
    },
    {
      "epoch": 0.4168912848158131,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001448237650629879,
      "loss": 2.4362,
      "step": 1073
    },
    {
      "epoch": 0.4172798135062286,
      "grad_norm": 0.78515625,
      "learning_rate": 0.0001447024143227341,
      "loss": 2.3739,
      "step": 1074
    },
    {
      "epoch": 0.41766834219664406,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001445809812580626,
      "loss": 2.3791,
      "step": 1075
    },
    {
      "epoch": 0.41805687088705956,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00014445946609260578,
      "loss": 2.4234,
      "step": 1076
    },
    {
      "epoch": 0.41844539957747506,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00014433786905014716,
      "loss": 2.4474,
      "step": 1077
    },
    {
      "epoch": 0.4188339282678905,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00014421619035462116,
      "loss": 2.4105,
      "step": 1078
    },
    {
      "epoch": 0.419222456958306,
      "grad_norm": 0.765625,
      "learning_rate": 0.00014409443023011238,
      "loss": 2.435,
      "step": 1079
    },
    {
      "epoch": 0.4196109856487215,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00014397258890085554,
      "loss": 2.4015,
      "step": 1080
    },
    {
      "epoch": 0.41999951433913696,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00014385066659123487,
      "loss": 2.3862,
      "step": 1081
    },
    {
      "epoch": 0.42038804302955246,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00014372866352578375,
      "loss": 2.432,
      "step": 1082
    },
    {
      "epoch": 0.42077657171996796,
      "grad_norm": 0.765625,
      "learning_rate": 0.00014360657992918422,
      "loss": 2.3696,
      "step": 1083
    },
    {
      "epoch": 0.4211651004103834,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001434844160262667,
      "loss": 2.3393,
      "step": 1084
    },
    {
      "epoch": 0.4215536291007989,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00014336217204200942,
      "loss": 2.4385,
      "step": 1085
    },
    {
      "epoch": 0.4219421577912144,
      "grad_norm": 0.78515625,
      "learning_rate": 0.0001432398482015382,
      "loss": 2.3572,
      "step": 1086
    },
    {
      "epoch": 0.42233068648162986,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001431174447301258,
      "loss": 2.4231,
      "step": 1087
    },
    {
      "epoch": 0.42271921517204536,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001429949618531917,
      "loss": 2.3236,
      "step": 1088
    },
    {
      "epoch": 0.42310774386246086,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00014287239979630164,
      "loss": 2.3515,
      "step": 1089
    },
    {
      "epoch": 0.4234962725528763,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001427497587851671,
      "loss": 2.3919,
      "step": 1090
    },
    {
      "epoch": 0.4238848012432918,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00014262703904564504,
      "loss": 2.3854,
      "step": 1091
    },
    {
      "epoch": 0.4242733299337073,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00014250424080373736,
      "loss": 2.4189,
      "step": 1092
    },
    {
      "epoch": 0.42466185862412276,
      "grad_norm": 0.8125,
      "learning_rate": 0.0001423813642855905,
      "loss": 2.4222,
      "step": 1093
    },
    {
      "epoch": 0.42505038731453826,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00014225840971749518,
      "loss": 2.3939,
      "step": 1094
    },
    {
      "epoch": 0.42543891600495376,
      "grad_norm": 0.75390625,
      "learning_rate": 0.0001421353773258857,
      "loss": 2.4524,
      "step": 1095
    },
    {
      "epoch": 0.4258274446953692,
      "grad_norm": 0.8125,
      "learning_rate": 0.0001420122673373398,
      "loss": 2.3831,
      "step": 1096
    },
    {
      "epoch": 0.4262159733857847,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00014188907997857804,
      "loss": 2.2944,
      "step": 1097
    },
    {
      "epoch": 0.4266045020762002,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00014176581547646353,
      "loss": 2.3749,
      "step": 1098
    },
    {
      "epoch": 0.42699303076661566,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00014164247405800144,
      "loss": 2.3957,
      "step": 1099
    },
    {
      "epoch": 0.42738155945703116,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00014151905595033852,
      "loss": 2.3632,
      "step": 1100
    },
    {
      "epoch": 0.42777008814744666,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00014139556138076286,
      "loss": 2.384,
      "step": 1101
    },
    {
      "epoch": 0.4281586168378621,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00014127199057670326,
      "loss": 2.4353,
      "step": 1102
    },
    {
      "epoch": 0.4285471455282776,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00014114834376572897,
      "loss": 2.3899,
      "step": 1103
    },
    {
      "epoch": 0.4289356742186931,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00014102462117554924,
      "loss": 2.4308,
      "step": 1104
    },
    {
      "epoch": 0.42932420290910855,
      "grad_norm": 0.7890625,
      "learning_rate": 0.0001409008230340128,
      "loss": 2.4151,
      "step": 1105
    },
    {
      "epoch": 0.42971273159952406,
      "grad_norm": 0.734375,
      "learning_rate": 0.0001407769495691076,
      "loss": 2.3521,
      "step": 1106
    },
    {
      "epoch": 0.43010126028993956,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00014065300100896022,
      "loss": 2.3728,
      "step": 1107
    },
    {
      "epoch": 0.430489788980355,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001405289775818356,
      "loss": 2.4092,
      "step": 1108
    },
    {
      "epoch": 0.4308783176707705,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00014040487951613658,
      "loss": 2.3365,
      "step": 1109
    },
    {
      "epoch": 0.431266846361186,
      "grad_norm": 0.796875,
      "learning_rate": 0.0001402807070404033,
      "loss": 2.3233,
      "step": 1110
    },
    {
      "epoch": 0.43165537505160145,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00014015646038331313,
      "loss": 2.3724,
      "step": 1111
    },
    {
      "epoch": 0.43204390374201695,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00014003213977367994,
      "loss": 2.4238,
      "step": 1112
    },
    {
      "epoch": 0.43243243243243246,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001399077454404539,
      "loss": 2.4004,
      "step": 1113
    },
    {
      "epoch": 0.4328209611228479,
      "grad_norm": 0.78125,
      "learning_rate": 0.00013978327761272072,
      "loss": 2.4084,
      "step": 1114
    },
    {
      "epoch": 0.4332094898132634,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00013965873651970175,
      "loss": 2.3828,
      "step": 1115
    },
    {
      "epoch": 0.4335980185036789,
      "grad_norm": 0.703125,
      "learning_rate": 0.00013953412239075302,
      "loss": 2.3548,
      "step": 1116
    },
    {
      "epoch": 0.43398654719409435,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00013940943545536524,
      "loss": 2.4108,
      "step": 1117
    },
    {
      "epoch": 0.43437507588450985,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001392846759431631,
      "loss": 2.3684,
      "step": 1118
    },
    {
      "epoch": 0.43476360457492536,
      "grad_norm": 0.828125,
      "learning_rate": 0.00013915984408390496,
      "loss": 2.3744,
      "step": 1119
    },
    {
      "epoch": 0.4351521332653408,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00013903494010748246,
      "loss": 2.3971,
      "step": 1120
    },
    {
      "epoch": 0.4355406619557563,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00013890996424392006,
      "loss": 2.4105,
      "step": 1121
    },
    {
      "epoch": 0.4359291906461718,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001387849167233745,
      "loss": 2.2925,
      "step": 1122
    },
    {
      "epoch": 0.43631771933658725,
      "grad_norm": 0.75,
      "learning_rate": 0.00013865979777613458,
      "loss": 2.3574,
      "step": 1123
    },
    {
      "epoch": 0.43670624802700275,
      "grad_norm": 0.78125,
      "learning_rate": 0.00013853460763262062,
      "loss": 2.3376,
      "step": 1124
    },
    {
      "epoch": 0.43709477671741825,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00013840934652338405,
      "loss": 2.4241,
      "step": 1125
    },
    {
      "epoch": 0.4374833054078337,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00013828401467910704,
      "loss": 2.3839,
      "step": 1126
    },
    {
      "epoch": 0.4378718340982492,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00013815861233060193,
      "loss": 2.3961,
      "step": 1127
    },
    {
      "epoch": 0.43826036278866465,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00013803313970881092,
      "loss": 2.3902,
      "step": 1128
    },
    {
      "epoch": 0.43864889147908015,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00013790759704480573,
      "loss": 2.4001,
      "step": 1129
    },
    {
      "epoch": 0.43903742016949565,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00013778198456978696,
      "loss": 2.3353,
      "step": 1130
    },
    {
      "epoch": 0.4394259488599111,
      "grad_norm": 0.78125,
      "learning_rate": 0.00013765630251508386,
      "loss": 2.3685,
      "step": 1131
    },
    {
      "epoch": 0.4398144775503266,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00013753055111215368,
      "loss": 2.3767,
      "step": 1132
    },
    {
      "epoch": 0.4402030062407421,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00013740473059258154,
      "loss": 2.417,
      "step": 1133
    },
    {
      "epoch": 0.44059153493115755,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00013727884118807976,
      "loss": 2.4301,
      "step": 1134
    },
    {
      "epoch": 0.44098006362157305,
      "grad_norm": 0.75,
      "learning_rate": 0.00013715288313048758,
      "loss": 2.4261,
      "step": 1135
    },
    {
      "epoch": 0.44136859231198855,
      "grad_norm": 0.78125,
      "learning_rate": 0.00013702685665177054,
      "loss": 2.4326,
      "step": 1136
    },
    {
      "epoch": 0.441757121002404,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00013690076198402036,
      "loss": 2.3514,
      "step": 1137
    },
    {
      "epoch": 0.4421456496928195,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00013677459935945425,
      "loss": 2.4182,
      "step": 1138
    },
    {
      "epoch": 0.442534178383235,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00013664836901041452,
      "loss": 2.4883,
      "step": 1139
    },
    {
      "epoch": 0.44292270707365045,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00013652207116936828,
      "loss": 2.3386,
      "step": 1140
    },
    {
      "epoch": 0.44331123576406595,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00013639570606890694,
      "loss": 2.3897,
      "step": 1141
    },
    {
      "epoch": 0.44369976445448145,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00013626927394174568,
      "loss": 2.3805,
      "step": 1142
    },
    {
      "epoch": 0.4440882931448969,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00013614277502072326,
      "loss": 2.358,
      "step": 1143
    },
    {
      "epoch": 0.4444768218353124,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001360162095388013,
      "loss": 2.3745,
      "step": 1144
    },
    {
      "epoch": 0.4448653505257279,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00013588957772906412,
      "loss": 2.3668,
      "step": 1145
    },
    {
      "epoch": 0.44525387921614334,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00013576287982471812,
      "loss": 2.3509,
      "step": 1146
    },
    {
      "epoch": 0.44564240790655885,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00013563611605909141,
      "loss": 2.3841,
      "step": 1147
    },
    {
      "epoch": 0.44603093659697435,
      "grad_norm": 0.8125,
      "learning_rate": 0.00013550928666563347,
      "loss": 2.3929,
      "step": 1148
    },
    {
      "epoch": 0.4464194652873898,
      "grad_norm": 0.734375,
      "learning_rate": 0.0001353823918779146,
      "loss": 2.377,
      "step": 1149
    },
    {
      "epoch": 0.4468079939778053,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00013525543192962548,
      "loss": 2.4199,
      "step": 1150
    },
    {
      "epoch": 0.4471965226682208,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00013512840705457685,
      "loss": 2.4202,
      "step": 1151
    },
    {
      "epoch": 0.44758505135863624,
      "grad_norm": 0.7421875,
      "learning_rate": 0.000135001317486699,
      "loss": 2.3966,
      "step": 1152
    },
    {
      "epoch": 0.44797358004905175,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00013487416346004137,
      "loss": 2.4289,
      "step": 1153
    },
    {
      "epoch": 0.44836210873946725,
      "grad_norm": 0.75,
      "learning_rate": 0.00013474694520877208,
      "loss": 2.3513,
      "step": 1154
    },
    {
      "epoch": 0.4487506374298827,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001346196629671776,
      "loss": 2.3925,
      "step": 1155
    },
    {
      "epoch": 0.4491391661202982,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00013449231696966208,
      "loss": 2.4184,
      "step": 1156
    },
    {
      "epoch": 0.4495276948107137,
      "grad_norm": 0.75,
      "learning_rate": 0.00013436490745074735,
      "loss": 2.364,
      "step": 1157
    },
    {
      "epoch": 0.44991622350112914,
      "grad_norm": 0.734375,
      "learning_rate": 0.00013423743464507194,
      "loss": 2.3693,
      "step": 1158
    },
    {
      "epoch": 0.45030475219154464,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001341098987873911,
      "loss": 2.3543,
      "step": 1159
    },
    {
      "epoch": 0.45069328088196015,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00013398230011257614,
      "loss": 2.426,
      "step": 1160
    },
    {
      "epoch": 0.4510818095723756,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00013385463885561412,
      "loss": 2.3451,
      "step": 1161
    },
    {
      "epoch": 0.4514703382627911,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00013372691525160725,
      "loss": 2.3361,
      "step": 1162
    },
    {
      "epoch": 0.4518588669532066,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001335991295357726,
      "loss": 2.4234,
      "step": 1163
    },
    {
      "epoch": 0.45224739564362204,
      "grad_norm": 0.78125,
      "learning_rate": 0.00013347128194344168,
      "loss": 2.3376,
      "step": 1164
    },
    {
      "epoch": 0.45263592433403754,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001333433727100599,
      "loss": 2.3383,
      "step": 1165
    },
    {
      "epoch": 0.45302445302445304,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00013321540207118614,
      "loss": 2.4261,
      "step": 1166
    },
    {
      "epoch": 0.4534129817148685,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00013308737026249248,
      "loss": 2.4405,
      "step": 1167
    },
    {
      "epoch": 0.453801510405284,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00013295927751976358,
      "loss": 2.356,
      "step": 1168
    },
    {
      "epoch": 0.4541900390956995,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00013283112407889633,
      "loss": 2.4666,
      "step": 1169
    },
    {
      "epoch": 0.45457856778611494,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00013270291017589936,
      "loss": 2.4644,
      "step": 1170
    },
    {
      "epoch": 0.45496709647653044,
      "grad_norm": 0.796875,
      "learning_rate": 0.00013257463604689275,
      "loss": 2.3546,
      "step": 1171
    },
    {
      "epoch": 0.45535562516694594,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001324463019281074,
      "loss": 2.4329,
      "step": 1172
    },
    {
      "epoch": 0.4557441538573614,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00013231790805588468,
      "loss": 2.3792,
      "step": 1173
    },
    {
      "epoch": 0.4561326825477769,
      "grad_norm": 0.7890625,
      "learning_rate": 0.0001321894546666761,
      "loss": 2.352,
      "step": 1174
    },
    {
      "epoch": 0.4565212112381924,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00013206094199704274,
      "loss": 2.3812,
      "step": 1175
    },
    {
      "epoch": 0.45690973992860784,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00013193237028365478,
      "loss": 2.3032,
      "step": 1176
    },
    {
      "epoch": 0.45729826861902334,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00013180373976329118,
      "loss": 2.3755,
      "step": 1177
    },
    {
      "epoch": 0.45768679730943884,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00013167505067283926,
      "loss": 2.4432,
      "step": 1178
    },
    {
      "epoch": 0.4580753259998543,
      "grad_norm": 0.66015625,
      "learning_rate": 0.0001315463032492941,
      "loss": 2.2993,
      "step": 1179
    },
    {
      "epoch": 0.4584638546902698,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00013141749772975825,
      "loss": 2.3378,
      "step": 1180
    },
    {
      "epoch": 0.4588523833806853,
      "grad_norm": 14.75,
      "learning_rate": 0.00013128863435144127,
      "loss": 2.37,
      "step": 1181
    },
    {
      "epoch": 0.45924091207110074,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00013115971335165926,
      "loss": 2.3849,
      "step": 1182
    },
    {
      "epoch": 0.45962944076151624,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00013103073496783447,
      "loss": 2.4525,
      "step": 1183
    },
    {
      "epoch": 0.46001796945193174,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00013090169943749476,
      "loss": 2.3965,
      "step": 1184
    },
    {
      "epoch": 0.4604064981423472,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00013077260699827326,
      "loss": 2.4148,
      "step": 1185
    },
    {
      "epoch": 0.4607950268327627,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00013064345788790788,
      "loss": 2.3294,
      "step": 1186
    },
    {
      "epoch": 0.4611835555231782,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00013051425234424105,
      "loss": 2.4082,
      "step": 1187
    },
    {
      "epoch": 0.46157208421359364,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00013038499060521886,
      "loss": 2.4042,
      "step": 1188
    },
    {
      "epoch": 0.46196061290400914,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00013025567290889112,
      "loss": 2.4304,
      "step": 1189
    },
    {
      "epoch": 0.46234914159442464,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00013012629949341053,
      "loss": 2.3919,
      "step": 1190
    },
    {
      "epoch": 0.4627376702848401,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00012999687059703257,
      "loss": 2.3465,
      "step": 1191
    },
    {
      "epoch": 0.4631261989752556,
      "grad_norm": 0.75,
      "learning_rate": 0.0001298673864581147,
      "loss": 2.3778,
      "step": 1192
    },
    {
      "epoch": 0.4635147276656711,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00012973784731511637,
      "loss": 2.4215,
      "step": 1193
    },
    {
      "epoch": 0.46390325635608654,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00012960825340659803,
      "loss": 2.3797,
      "step": 1194
    },
    {
      "epoch": 0.46429178504650204,
      "grad_norm": 0.78125,
      "learning_rate": 0.00012947860497122119,
      "loss": 2.4168,
      "step": 1195
    },
    {
      "epoch": 0.4646803137369175,
      "grad_norm": 0.7421875,
      "learning_rate": 0.0001293489022477477,
      "loss": 2.3733,
      "step": 1196
    },
    {
      "epoch": 0.465068842427333,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00012921914547503943,
      "loss": 2.3437,
      "step": 1197
    },
    {
      "epoch": 0.4654573711177485,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00012908933489205775,
      "loss": 2.3236,
      "step": 1198
    },
    {
      "epoch": 0.46584589980816393,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00012895947073786313,
      "loss": 2.4033,
      "step": 1199
    },
    {
      "epoch": 0.46623442849857943,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00012882955325161472,
      "loss": 2.3573,
      "step": 1200
    },
    {
      "epoch": 0.46662295718899494,
      "grad_norm": 0.703125,
      "learning_rate": 0.00012869958267256988,
      "loss": 2.3784,
      "step": 1201
    },
    {
      "epoch": 0.4670114858794104,
      "grad_norm": 0.734375,
      "learning_rate": 0.00012856955924008375,
      "loss": 2.3903,
      "step": 1202
    },
    {
      "epoch": 0.4674000145698259,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00012843948319360874,
      "loss": 2.3779,
      "step": 1203
    },
    {
      "epoch": 0.4677885432602414,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00012830935477269425,
      "loss": 2.4628,
      "step": 1204
    },
    {
      "epoch": 0.46817707195065683,
      "grad_norm": 0.765625,
      "learning_rate": 0.00012817917421698613,
      "loss": 2.3718,
      "step": 1205
    },
    {
      "epoch": 0.46856560064107233,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00012804894176622617,
      "loss": 2.368,
      "step": 1206
    },
    {
      "epoch": 0.46895412933148783,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00012791865766025176,
      "loss": 2.4149,
      "step": 1207
    },
    {
      "epoch": 0.4693426580219033,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001277883221389954,
      "loss": 2.3687,
      "step": 1208
    },
    {
      "epoch": 0.4697311867123188,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00012765793544248442,
      "loss": 2.4661,
      "step": 1209
    },
    {
      "epoch": 0.4701197154027343,
      "grad_norm": 0.73046875,
      "learning_rate": 0.0001275274978108401,
      "loss": 2.3438,
      "step": 1210
    },
    {
      "epoch": 0.47050824409314973,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00012739700948427785,
      "loss": 2.4013,
      "step": 1211
    },
    {
      "epoch": 0.47089677278356523,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00012726647070310622,
      "loss": 2.4636,
      "step": 1212
    },
    {
      "epoch": 0.47128530147398073,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00012713588170772675,
      "loss": 2.4221,
      "step": 1213
    },
    {
      "epoch": 0.4716738301643962,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00012700524273863347,
      "loss": 2.4232,
      "step": 1214
    },
    {
      "epoch": 0.4720623588548117,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00012687455403641242,
      "loss": 2.3794,
      "step": 1215
    },
    {
      "epoch": 0.4724508875452272,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00012674381584174124,
      "loss": 2.4058,
      "step": 1216
    },
    {
      "epoch": 0.47283941623564263,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00012661302839538866,
      "loss": 2.3517,
      "step": 1217
    },
    {
      "epoch": 0.47322794492605813,
      "grad_norm": 0.75,
      "learning_rate": 0.00012648219193821424,
      "loss": 2.4763,
      "step": 1218
    },
    {
      "epoch": 0.47361647361647363,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00012635130671116772,
      "loss": 2.3852,
      "step": 1219
    },
    {
      "epoch": 0.4740050023068891,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00012622037295528858,
      "loss": 2.3484,
      "step": 1220
    },
    {
      "epoch": 0.4743935309973046,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00012608939091170577,
      "loss": 2.3919,
      "step": 1221
    },
    {
      "epoch": 0.4747820596877201,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00012595836082163718,
      "loss": 2.4426,
      "step": 1222
    },
    {
      "epoch": 0.4751705883781355,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00012582728292638912,
      "loss": 2.3485,
      "step": 1223
    },
    {
      "epoch": 0.47555911706855103,
      "grad_norm": 0.72265625,
      "learning_rate": 0.000125696157467356,
      "loss": 2.4109,
      "step": 1224
    },
    {
      "epoch": 0.47594764575896653,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00012556498468601975,
      "loss": 2.4374,
      "step": 1225
    },
    {
      "epoch": 0.476336174449382,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001254337648239495,
      "loss": 2.3384,
      "step": 1226
    },
    {
      "epoch": 0.4767247031397975,
      "grad_norm": 0.734375,
      "learning_rate": 0.00012530249812280108,
      "loss": 2.3056,
      "step": 1227
    },
    {
      "epoch": 0.477113231830213,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001251711848243166,
      "loss": 2.416,
      "step": 1228
    },
    {
      "epoch": 0.4775017605206284,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00012503982517032388,
      "loss": 2.3606,
      "step": 1229
    },
    {
      "epoch": 0.47789028921104393,
      "grad_norm": 0.734375,
      "learning_rate": 0.00012490841940273627,
      "loss": 2.4288,
      "step": 1230
    },
    {
      "epoch": 0.47827881790145943,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001247769677635519,
      "loss": 2.4206,
      "step": 1231
    },
    {
      "epoch": 0.4786673465918749,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00012464547049485347,
      "loss": 2.3527,
      "step": 1232
    },
    {
      "epoch": 0.4790558752822904,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00012451392783880766,
      "loss": 2.4884,
      "step": 1233
    },
    {
      "epoch": 0.4794444039727059,
      "grad_norm": 0.765625,
      "learning_rate": 0.00012438234003766478,
      "loss": 2.4246,
      "step": 1234
    },
    {
      "epoch": 0.4798329326631213,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001242507073337582,
      "loss": 2.4027,
      "step": 1235
    },
    {
      "epoch": 0.4802214613535368,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00012411902996950407,
      "loss": 2.3954,
      "step": 1236
    },
    {
      "epoch": 0.48060999004395233,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00012398730818740077,
      "loss": 2.4057,
      "step": 1237
    },
    {
      "epoch": 0.4809985187343678,
      "grad_norm": 0.75,
      "learning_rate": 0.00012385554223002845,
      "loss": 2.3871,
      "step": 1238
    },
    {
      "epoch": 0.4813870474247833,
      "grad_norm": 0.71484375,
      "learning_rate": 0.0001237237323400486,
      "loss": 2.4078,
      "step": 1239
    },
    {
      "epoch": 0.4817755761151988,
      "grad_norm": 0.75,
      "learning_rate": 0.00012359187876020367,
      "loss": 2.4764,
      "step": 1240
    },
    {
      "epoch": 0.4821641048056142,
      "grad_norm": 0.703125,
      "learning_rate": 0.0001234599817333166,
      "loss": 2.3753,
      "step": 1241
    },
    {
      "epoch": 0.4825526334960297,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00012332804150229018,
      "loss": 2.3435,
      "step": 1242
    },
    {
      "epoch": 0.4829411621864452,
      "grad_norm": 0.703125,
      "learning_rate": 0.00012319605831010694,
      "loss": 2.3939,
      "step": 1243
    },
    {
      "epoch": 0.4833296908768607,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00012306403239982844,
      "loss": 2.4263,
      "step": 1244
    },
    {
      "epoch": 0.4837182195672762,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00012293196401459494,
      "loss": 2.4282,
      "step": 1245
    },
    {
      "epoch": 0.4841067482576917,
      "grad_norm": 0.78515625,
      "learning_rate": 0.0001227998533976249,
      "loss": 2.3939,
      "step": 1246
    },
    {
      "epoch": 0.4844952769481071,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00012266770079221457,
      "loss": 2.4431,
      "step": 1247
    },
    {
      "epoch": 0.4848838056385226,
      "grad_norm": 0.765625,
      "learning_rate": 0.00012253550644173754,
      "loss": 2.3905,
      "step": 1248
    },
    {
      "epoch": 0.4852723343289381,
      "grad_norm": 0.78125,
      "learning_rate": 0.00012240327058964424,
      "loss": 2.372,
      "step": 1249
    },
    {
      "epoch": 0.4856608630193536,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00012227099347946155,
      "loss": 2.4248,
      "step": 1250
    },
    {
      "epoch": 0.4860493917097691,
      "grad_norm": 0.734375,
      "learning_rate": 0.00012213867535479234,
      "loss": 2.3693,
      "step": 1251
    },
    {
      "epoch": 0.4864379204001846,
      "grad_norm": 0.7109375,
      "learning_rate": 0.000122006316459315,
      "loss": 2.3079,
      "step": 1252
    },
    {
      "epoch": 0.4868264490906,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00012187391703678301,
      "loss": 2.3852,
      "step": 1253
    },
    {
      "epoch": 0.4872149777810155,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00012174147733102448,
      "loss": 2.409,
      "step": 1254
    },
    {
      "epoch": 0.487603506471431,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00012160899758594176,
      "loss": 2.3169,
      "step": 1255
    },
    {
      "epoch": 0.48799203516184647,
      "grad_norm": 0.78125,
      "learning_rate": 0.00012147647804551078,
      "loss": 2.4159,
      "step": 1256
    },
    {
      "epoch": 0.488380563852262,
      "grad_norm": 0.765625,
      "learning_rate": 0.00012134391895378097,
      "loss": 2.3722,
      "step": 1257
    },
    {
      "epoch": 0.4887690925426775,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00012121132055487441,
      "loss": 2.3752,
      "step": 1258
    },
    {
      "epoch": 0.4891576212330929,
      "grad_norm": 0.796875,
      "learning_rate": 0.00012107868309298574,
      "loss": 2.3253,
      "step": 1259
    },
    {
      "epoch": 0.4895461499235084,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00012094600681238135,
      "loss": 2.3405,
      "step": 1260
    },
    {
      "epoch": 0.4899346786139239,
      "grad_norm": 0.765625,
      "learning_rate": 0.00012081329195739928,
      "loss": 2.3236,
      "step": 1261
    },
    {
      "epoch": 0.49032320730433937,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00012068053877244853,
      "loss": 2.408,
      "step": 1262
    },
    {
      "epoch": 0.49071173599475487,
      "grad_norm": 0.69921875,
      "learning_rate": 0.0001205477475020087,
      "loss": 2.3682,
      "step": 1263
    },
    {
      "epoch": 0.4911002646851703,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001204149183906296,
      "loss": 2.4261,
      "step": 1264
    },
    {
      "epoch": 0.4914887933755858,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00012028205168293056,
      "loss": 2.3513,
      "step": 1265
    },
    {
      "epoch": 0.4918773220660013,
      "grad_norm": 0.70703125,
      "learning_rate": 0.0001201491476236003,
      "loss": 2.4025,
      "step": 1266
    },
    {
      "epoch": 0.49226585075641677,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00012001620645739629,
      "loss": 2.3808,
      "step": 1267
    },
    {
      "epoch": 0.49265437944683227,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001198832284291443,
      "loss": 2.4421,
      "step": 1268
    },
    {
      "epoch": 0.49304290813724777,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00011975021378373802,
      "loss": 2.4174,
      "step": 1269
    },
    {
      "epoch": 0.4934314368276632,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011961716276613857,
      "loss": 2.3233,
      "step": 1270
    },
    {
      "epoch": 0.4938199655180787,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00011948407562137404,
      "loss": 2.4112,
      "step": 1271
    },
    {
      "epoch": 0.4942084942084942,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001193509525945391,
      "loss": 2.4136,
      "step": 1272
    },
    {
      "epoch": 0.49459702289890967,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00011921779393079438,
      "loss": 2.3525,
      "step": 1273
    },
    {
      "epoch": 0.49498555158932517,
      "grad_norm": 0.734375,
      "learning_rate": 0.00011908459987536629,
      "loss": 2.3849,
      "step": 1274
    },
    {
      "epoch": 0.49537408027974067,
      "grad_norm": 0.74609375,
      "learning_rate": 0.0001189513706735463,
      "loss": 2.4242,
      "step": 1275
    },
    {
      "epoch": 0.4957626089701561,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00011881810657069068,
      "loss": 2.3559,
      "step": 1276
    },
    {
      "epoch": 0.4961511376605716,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00011868480781221995,
      "loss": 2.3649,
      "step": 1277
    },
    {
      "epoch": 0.4965396663509871,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00011855147464361845,
      "loss": 2.2891,
      "step": 1278
    },
    {
      "epoch": 0.49692819504140257,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00011841810731043386,
      "loss": 2.4437,
      "step": 1279
    },
    {
      "epoch": 0.49731672373181807,
      "grad_norm": 0.78125,
      "learning_rate": 0.00011828470605827682,
      "loss": 2.3526,
      "step": 1280
    },
    {
      "epoch": 0.49770525242223357,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00011815127113282047,
      "loss": 2.3752,
      "step": 1281
    },
    {
      "epoch": 0.498093781112649,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00011801780277979986,
      "loss": 2.3949,
      "step": 1282
    },
    {
      "epoch": 0.4984823098030645,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00011788430124501167,
      "loss": 2.4448,
      "step": 1283
    },
    {
      "epoch": 0.49887083849348,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001177507667743137,
      "loss": 2.3877,
      "step": 1284
    },
    {
      "epoch": 0.49925936718389546,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00011761719961362434,
      "loss": 2.3495,
      "step": 1285
    },
    {
      "epoch": 0.49964789587431097,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00011748360000892227,
      "loss": 2.4588,
      "step": 1286
    },
    {
      "epoch": 0.5000364245647264,
      "grad_norm": 0.734375,
      "learning_rate": 0.00011734996820624581,
      "loss": 2.3902,
      "step": 1287
    },
    {
      "epoch": 0.5004249532551419,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001172163044516927,
      "loss": 2.3359,
      "step": 1288
    },
    {
      "epoch": 0.5008134819455574,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00011708260899141943,
      "loss": 2.3574,
      "step": 1289
    },
    {
      "epoch": 0.5012020106359729,
      "grad_norm": 0.703125,
      "learning_rate": 0.00011694888207164091,
      "loss": 2.3206,
      "step": 1290
    },
    {
      "epoch": 0.5015905393263884,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00011681512393862999,
      "loss": 2.4011,
      "step": 1291
    },
    {
      "epoch": 0.5019790680168039,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00011668133483871699,
      "loss": 2.3868,
      "step": 1292
    },
    {
      "epoch": 0.5023675967072193,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00011654751501828927,
      "loss": 2.373,
      "step": 1293
    },
    {
      "epoch": 0.5027561253976348,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011641366472379078,
      "loss": 2.3772,
      "step": 1294
    },
    {
      "epoch": 0.5031446540880503,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00011627978420172156,
      "loss": 2.4444,
      "step": 1295
    },
    {
      "epoch": 0.5035331827784658,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011614587369863737,
      "loss": 2.3138,
      "step": 1296
    },
    {
      "epoch": 0.5039217114688813,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00011601193346114905,
      "loss": 2.2997,
      "step": 1297
    },
    {
      "epoch": 0.5043102401592967,
      "grad_norm": 0.671875,
      "learning_rate": 0.00011587796373592237,
      "loss": 2.3125,
      "step": 1298
    },
    {
      "epoch": 0.5046987688497122,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00011574396476967732,
      "loss": 2.423,
      "step": 1299
    },
    {
      "epoch": 0.5050872975401277,
      "grad_norm": 0.703125,
      "learning_rate": 0.00011560993680918774,
      "loss": 2.3215,
      "step": 1300
    },
    {
      "epoch": 0.5054758262305432,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00011547588010128088,
      "loss": 2.3576,
      "step": 1301
    },
    {
      "epoch": 0.5058643549209587,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001153417948928369,
      "loss": 2.3201,
      "step": 1302
    },
    {
      "epoch": 0.5062528836113742,
      "grad_norm": 0.765625,
      "learning_rate": 0.00011520768143078853,
      "loss": 2.4282,
      "step": 1303
    },
    {
      "epoch": 0.5066414123017896,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00011507353996212043,
      "loss": 2.3862,
      "step": 1304
    },
    {
      "epoch": 0.5070299409922051,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00011493937073386889,
      "loss": 2.3773,
      "step": 1305
    },
    {
      "epoch": 0.5074184696826206,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00011480517399312134,
      "loss": 2.4023,
      "step": 1306
    },
    {
      "epoch": 0.5078069983730361,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00011467094998701582,
      "loss": 2.3434,
      "step": 1307
    },
    {
      "epoch": 0.5081955270634516,
      "grad_norm": 0.75,
      "learning_rate": 0.00011453669896274066,
      "loss": 2.3864,
      "step": 1308
    },
    {
      "epoch": 0.5085840557538671,
      "grad_norm": 0.765625,
      "learning_rate": 0.00011440242116753384,
      "loss": 2.3932,
      "step": 1309
    },
    {
      "epoch": 0.5089725844442825,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011426811684868275,
      "loss": 2.3176,
      "step": 1310
    },
    {
      "epoch": 0.509361113134698,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00011413378625352358,
      "loss": 2.4461,
      "step": 1311
    },
    {
      "epoch": 0.5097496418251135,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001139994296294409,
      "loss": 2.3435,
      "step": 1312
    },
    {
      "epoch": 0.510138170515529,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001138650472238672,
      "loss": 2.329,
      "step": 1313
    },
    {
      "epoch": 0.5105266992059445,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00011373063928428252,
      "loss": 2.3996,
      "step": 1314
    },
    {
      "epoch": 0.51091522789636,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00011359620605821384,
      "loss": 2.4047,
      "step": 1315
    },
    {
      "epoch": 0.5113037565867754,
      "grad_norm": 0.71875,
      "learning_rate": 0.0001134617477932348,
      "loss": 2.4164,
      "step": 1316
    },
    {
      "epoch": 0.5116922852771909,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00011332726473696505,
      "loss": 2.326,
      "step": 1317
    },
    {
      "epoch": 0.5120808139676064,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00011319275713706996,
      "loss": 2.3532,
      "step": 1318
    },
    {
      "epoch": 0.5124693426580219,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00011305822524126007,
      "loss": 2.3886,
      "step": 1319
    },
    {
      "epoch": 0.5128578713484374,
      "grad_norm": 0.7421875,
      "learning_rate": 0.0001129236692972907,
      "loss": 2.4516,
      "step": 1320
    },
    {
      "epoch": 0.5132464000388529,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00011278908955296143,
      "loss": 2.371,
      "step": 1321
    },
    {
      "epoch": 0.5136349287292683,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011265448625611568,
      "loss": 2.3791,
      "step": 1322
    },
    {
      "epoch": 0.5140234574196838,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00011251985965464022,
      "loss": 2.4764,
      "step": 1323
    },
    {
      "epoch": 0.5144119861100993,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00011238520999646479,
      "loss": 2.4049,
      "step": 1324
    },
    {
      "epoch": 0.5148005148005148,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00011225053752956152,
      "loss": 2.3209,
      "step": 1325
    },
    {
      "epoch": 0.5151890434909303,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00011211584250194463,
      "loss": 2.3245,
      "step": 1326
    },
    {
      "epoch": 0.5155775721813458,
      "grad_norm": 0.77734375,
      "learning_rate": 0.0001119811251616698,
      "loss": 2.3636,
      "step": 1327
    },
    {
      "epoch": 0.5159661008717612,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00011184638575683388,
      "loss": 2.3771,
      "step": 1328
    },
    {
      "epoch": 0.5163546295621767,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00011171162453557431,
      "loss": 2.4129,
      "step": 1329
    },
    {
      "epoch": 0.5167431582525922,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00011157684174606872,
      "loss": 2.3208,
      "step": 1330
    },
    {
      "epoch": 0.5171316869430077,
      "grad_norm": 0.703125,
      "learning_rate": 0.00011144203763653443,
      "loss": 2.4374,
      "step": 1331
    },
    {
      "epoch": 0.5175202156334232,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00011130721245522808,
      "loss": 2.4326,
      "step": 1332
    },
    {
      "epoch": 0.5179087443238387,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00011117236645044506,
      "loss": 2.3765,
      "step": 1333
    },
    {
      "epoch": 0.5182972730142541,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00011103749987051916,
      "loss": 2.3262,
      "step": 1334
    },
    {
      "epoch": 0.5186858017046696,
      "grad_norm": 0.703125,
      "learning_rate": 0.00011090261296382202,
      "loss": 2.3944,
      "step": 1335
    },
    {
      "epoch": 0.5190743303950851,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00011076770597876272,
      "loss": 2.3092,
      "step": 1336
    },
    {
      "epoch": 0.5194628590855006,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00011063277916378736,
      "loss": 2.3485,
      "step": 1337
    },
    {
      "epoch": 0.5198513877759161,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001104978327673785,
      "loss": 2.4069,
      "step": 1338
    },
    {
      "epoch": 0.5202399164663316,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00011036286703805478,
      "loss": 2.4546,
      "step": 1339
    },
    {
      "epoch": 0.520628445156747,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011022788222437048,
      "loss": 2.3471,
      "step": 1340
    },
    {
      "epoch": 0.5210169738471625,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011009287857491497,
      "loss": 2.4365,
      "step": 1341
    },
    {
      "epoch": 0.521405502537578,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00010995785633831233,
      "loss": 2.358,
      "step": 1342
    },
    {
      "epoch": 0.5217940312279935,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00010982281576322091,
      "loss": 2.3183,
      "step": 1343
    },
    {
      "epoch": 0.522182559918409,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00010968775709833274,
      "loss": 2.4138,
      "step": 1344
    },
    {
      "epoch": 0.5225710886088245,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00010955268059237327,
      "loss": 2.3831,
      "step": 1345
    },
    {
      "epoch": 0.5229596172992399,
      "grad_norm": 0.765625,
      "learning_rate": 0.00010941758649410075,
      "loss": 2.3526,
      "step": 1346
    },
    {
      "epoch": 0.5233481459896554,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001092824750523058,
      "loss": 2.3985,
      "step": 1347
    },
    {
      "epoch": 0.5237366746800709,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00010914734651581108,
      "loss": 2.4037,
      "step": 1348
    },
    {
      "epoch": 0.5241252033704864,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00010901220113347062,
      "loss": 2.4123,
      "step": 1349
    },
    {
      "epoch": 0.5245137320609019,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00010887703915416951,
      "loss": 2.3917,
      "step": 1350
    },
    {
      "epoch": 0.5249022607513174,
      "grad_norm": 0.765625,
      "learning_rate": 0.00010874186082682345,
      "loss": 2.4087,
      "step": 1351
    },
    {
      "epoch": 0.5252907894417328,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00010860666640037825,
      "loss": 2.3286,
      "step": 1352
    },
    {
      "epoch": 0.5256793181321483,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00010847145612380923,
      "loss": 2.4452,
      "step": 1353
    },
    {
      "epoch": 0.5260678468225638,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00010833623024612105,
      "loss": 2.3288,
      "step": 1354
    },
    {
      "epoch": 0.5264563755129793,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00010820098901634705,
      "loss": 2.3248,
      "step": 1355
    },
    {
      "epoch": 0.5268449042033948,
      "grad_norm": 0.6953125,
      "learning_rate": 0.0001080657326835489,
      "loss": 2.3524,
      "step": 1356
    },
    {
      "epoch": 0.5272334328938103,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00010793046149681593,
      "loss": 2.3483,
      "step": 1357
    },
    {
      "epoch": 0.5276219615842257,
      "grad_norm": 0.71875,
      "learning_rate": 0.00010779517570526499,
      "loss": 2.4338,
      "step": 1358
    },
    {
      "epoch": 0.5280104902746412,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00010765987555803973,
      "loss": 2.4,
      "step": 1359
    },
    {
      "epoch": 0.5283990189650567,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00010752456130431032,
      "loss": 2.3773,
      "step": 1360
    },
    {
      "epoch": 0.5287875476554722,
      "grad_norm": 0.734375,
      "learning_rate": 0.00010738923319327281,
      "loss": 2.333,
      "step": 1361
    },
    {
      "epoch": 0.5291760763458877,
      "grad_norm": 0.72265625,
      "learning_rate": 0.0001072538914741488,
      "loss": 2.4401,
      "step": 1362
    },
    {
      "epoch": 0.5295646050363032,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00010711853639618497,
      "loss": 2.3724,
      "step": 1363
    },
    {
      "epoch": 0.5299531337267186,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00010698316820865264,
      "loss": 2.43,
      "step": 1364
    },
    {
      "epoch": 0.5303416624171341,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00010684778716084722,
      "loss": 2.3521,
      "step": 1365
    },
    {
      "epoch": 0.5307301911075496,
      "grad_norm": 0.71875,
      "learning_rate": 0.0001067123935020878,
      "loss": 2.4063,
      "step": 1366
    },
    {
      "epoch": 0.5311187197979651,
      "grad_norm": 0.6875,
      "learning_rate": 0.0001065769874817167,
      "loss": 2.3809,
      "step": 1367
    },
    {
      "epoch": 0.5315072484883806,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00010644156934909901,
      "loss": 2.3949,
      "step": 1368
    },
    {
      "epoch": 0.531895777178796,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00010630613935362217,
      "loss": 2.3989,
      "step": 1369
    },
    {
      "epoch": 0.5322843058692115,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00010617069774469543,
      "loss": 2.4125,
      "step": 1370
    },
    {
      "epoch": 0.532672834559627,
      "grad_norm": 0.703125,
      "learning_rate": 0.00010603524477174937,
      "loss": 2.3935,
      "step": 1371
    },
    {
      "epoch": 0.5330613632500425,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00010589978068423561,
      "loss": 2.4038,
      "step": 1372
    },
    {
      "epoch": 0.533449891940458,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00010576430573162612,
      "loss": 2.3936,
      "step": 1373
    },
    {
      "epoch": 0.5338384206308735,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00010562882016341303,
      "loss": 2.336,
      "step": 1374
    },
    {
      "epoch": 0.5342269493212889,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00010549332422910782,
      "loss": 2.3207,
      "step": 1375
    },
    {
      "epoch": 0.5346154780117044,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00010535781817824126,
      "loss": 2.3884,
      "step": 1376
    },
    {
      "epoch": 0.5350040067021199,
      "grad_norm": 0.69140625,
      "learning_rate": 0.0001052223022603626,
      "loss": 2.3599,
      "step": 1377
    },
    {
      "epoch": 0.5353925353925354,
      "grad_norm": 0.6875,
      "learning_rate": 0.00010508677672503942,
      "loss": 2.4099,
      "step": 1378
    },
    {
      "epoch": 0.5357810640829509,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00010495124182185679,
      "loss": 2.4312,
      "step": 1379
    },
    {
      "epoch": 0.5361695927733664,
      "grad_norm": 0.6875,
      "learning_rate": 0.00010481569780041723,
      "loss": 2.3607,
      "step": 1380
    },
    {
      "epoch": 0.5365581214637818,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00010468014491033995,
      "loss": 2.4015,
      "step": 1381
    },
    {
      "epoch": 0.5369466501541973,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00010454458340126059,
      "loss": 2.3508,
      "step": 1382
    },
    {
      "epoch": 0.5373351788446128,
      "grad_norm": 0.71484375,
      "learning_rate": 0.0001044090135228305,
      "loss": 2.368,
      "step": 1383
    },
    {
      "epoch": 0.5377237075350283,
      "grad_norm": 0.71875,
      "learning_rate": 0.00010427343552471657,
      "loss": 2.4,
      "step": 1384
    },
    {
      "epoch": 0.5381122362254438,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00010413784965660063,
      "loss": 2.4134,
      "step": 1385
    },
    {
      "epoch": 0.5385007649158593,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00010400225616817895,
      "loss": 2.3331,
      "step": 1386
    },
    {
      "epoch": 0.5388892936062747,
      "grad_norm": 0.75,
      "learning_rate": 0.00010386665530916191,
      "loss": 2.4752,
      "step": 1387
    },
    {
      "epoch": 0.5392778222966902,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00010373104732927341,
      "loss": 2.396,
      "step": 1388
    },
    {
      "epoch": 0.5396663509871057,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00010359543247825045,
      "loss": 2.3771,
      "step": 1389
    },
    {
      "epoch": 0.5400548796775212,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00010345981100584272,
      "loss": 2.3218,
      "step": 1390
    },
    {
      "epoch": 0.5404434083679367,
      "grad_norm": 0.67578125,
      "learning_rate": 0.0001033241831618121,
      "loss": 2.2933,
      "step": 1391
    },
    {
      "epoch": 0.5408319370583522,
      "grad_norm": 0.71875,
      "learning_rate": 0.00010318854919593221,
      "loss": 2.4244,
      "step": 1392
    },
    {
      "epoch": 0.5412204657487676,
      "grad_norm": 0.67578125,
      "learning_rate": 0.00010305290935798792,
      "loss": 2.3525,
      "step": 1393
    },
    {
      "epoch": 0.5416089944391831,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00010291726389777489,
      "loss": 2.3758,
      "step": 1394
    },
    {
      "epoch": 0.5419975231295986,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00010278161306509923,
      "loss": 2.3396,
      "step": 1395
    },
    {
      "epoch": 0.5423860518200141,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00010264595710977687,
      "loss": 2.3817,
      "step": 1396
    },
    {
      "epoch": 0.5427745805104296,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00010251029628163316,
      "loss": 2.4236,
      "step": 1397
    },
    {
      "epoch": 0.5431631092008451,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00010237463083050245,
      "loss": 2.3765,
      "step": 1398
    },
    {
      "epoch": 0.5435516378912605,
      "grad_norm": 0.71875,
      "learning_rate": 0.0001022389610062276,
      "loss": 2.3717,
      "step": 1399
    },
    {
      "epoch": 0.543940166581676,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00010210328705865962,
      "loss": 2.3726,
      "step": 1400
    },
    {
      "epoch": 0.5443286952720915,
      "grad_norm": 0.6640625,
      "learning_rate": 0.00010196760923765689,
      "loss": 2.3417,
      "step": 1401
    },
    {
      "epoch": 0.544717223962507,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00010183192779308512,
      "loss": 2.3906,
      "step": 1402
    },
    {
      "epoch": 0.5451057526529225,
      "grad_norm": 0.71875,
      "learning_rate": 0.00010169624297481664,
      "loss": 2.3572,
      "step": 1403
    },
    {
      "epoch": 0.545494281343338,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00010156055503272996,
      "loss": 2.3263,
      "step": 1404
    },
    {
      "epoch": 0.5458828100337534,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00010142486421670941,
      "loss": 2.3674,
      "step": 1405
    },
    {
      "epoch": 0.5462713387241689,
      "grad_norm": 0.6875,
      "learning_rate": 0.00010128917077664448,
      "loss": 2.3493,
      "step": 1406
    },
    {
      "epoch": 0.5466598674145844,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00010115347496242964,
      "loss": 2.4162,
      "step": 1407
    },
    {
      "epoch": 0.5470483961049999,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00010101777702396367,
      "loss": 2.4186,
      "step": 1408
    },
    {
      "epoch": 0.5474369247954154,
      "grad_norm": 0.66796875,
      "learning_rate": 0.00010088207721114922,
      "loss": 2.365,
      "step": 1409
    },
    {
      "epoch": 0.5478254534858309,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00010074637577389253,
      "loss": 2.3712,
      "step": 1410
    },
    {
      "epoch": 0.5482139821762463,
      "grad_norm": 0.67578125,
      "learning_rate": 0.0001006106729621026,
      "loss": 2.3686,
      "step": 1411
    },
    {
      "epoch": 0.5486025108666618,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00010047496902569122,
      "loss": 2.3628,
      "step": 1412
    },
    {
      "epoch": 0.5489910395570773,
      "grad_norm": 0.66796875,
      "learning_rate": 0.00010033926421457207,
      "loss": 2.34,
      "step": 1413
    },
    {
      "epoch": 0.5493795682474928,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00010020355877866054,
      "loss": 2.359,
      "step": 1414
    },
    {
      "epoch": 0.5497680969379083,
      "grad_norm": 0.69140625,
      "learning_rate": 0.0001000678529678731,
      "loss": 2.3546,
      "step": 1415
    },
    {
      "epoch": 0.5501566256283238,
      "grad_norm": 0.69140625,
      "learning_rate": 9.993214703212696e-05,
      "loss": 2.3523,
      "step": 1416
    },
    {
      "epoch": 0.5505451543187392,
      "grad_norm": 0.72265625,
      "learning_rate": 9.97964412213395e-05,
      "loss": 2.3794,
      "step": 1417
    },
    {
      "epoch": 0.5509336830091547,
      "grad_norm": 0.83984375,
      "learning_rate": 9.966073578542795e-05,
      "loss": 2.3968,
      "step": 1418
    },
    {
      "epoch": 0.5513222116995702,
      "grad_norm": 0.7421875,
      "learning_rate": 9.952503097430882e-05,
      "loss": 2.4061,
      "step": 1419
    },
    {
      "epoch": 0.5517107403899857,
      "grad_norm": 0.73046875,
      "learning_rate": 9.938932703789743e-05,
      "loss": 2.3675,
      "step": 1420
    },
    {
      "epoch": 0.5520992690804012,
      "grad_norm": 0.6953125,
      "learning_rate": 9.925362422610754e-05,
      "loss": 2.3192,
      "step": 1421
    },
    {
      "epoch": 0.5524877977708167,
      "grad_norm": 0.71484375,
      "learning_rate": 9.91179227888508e-05,
      "loss": 2.354,
      "step": 1422
    },
    {
      "epoch": 0.5528763264612321,
      "grad_norm": 0.67578125,
      "learning_rate": 9.898222297603639e-05,
      "loss": 2.3639,
      "step": 1423
    },
    {
      "epoch": 0.5532648551516476,
      "grad_norm": 0.7109375,
      "learning_rate": 9.884652503757041e-05,
      "loss": 2.4172,
      "step": 1424
    },
    {
      "epoch": 0.5536533838420631,
      "grad_norm": 0.69140625,
      "learning_rate": 9.871082922335554e-05,
      "loss": 2.3958,
      "step": 1425
    },
    {
      "epoch": 0.5540419125324786,
      "grad_norm": 0.6953125,
      "learning_rate": 9.857513578329063e-05,
      "loss": 2.3441,
      "step": 1426
    },
    {
      "epoch": 0.5544304412228941,
      "grad_norm": 0.65625,
      "learning_rate": 9.843944496727007e-05,
      "loss": 2.3684,
      "step": 1427
    },
    {
      "epoch": 0.5548189699133096,
      "grad_norm": 0.76171875,
      "learning_rate": 9.830375702518338e-05,
      "loss": 2.3754,
      "step": 1428
    },
    {
      "epoch": 0.555207498603725,
      "grad_norm": 0.67578125,
      "learning_rate": 9.81680722069149e-05,
      "loss": 2.3649,
      "step": 1429
    },
    {
      "epoch": 0.5555960272941405,
      "grad_norm": 0.69140625,
      "learning_rate": 9.803239076234312e-05,
      "loss": 2.4059,
      "step": 1430
    },
    {
      "epoch": 0.555984555984556,
      "grad_norm": 0.7109375,
      "learning_rate": 9.78967129413404e-05,
      "loss": 2.3236,
      "step": 1431
    },
    {
      "epoch": 0.5563730846749715,
      "grad_norm": 0.6953125,
      "learning_rate": 9.77610389937724e-05,
      "loss": 2.3588,
      "step": 1432
    },
    {
      "epoch": 0.556761613365387,
      "grad_norm": 0.74609375,
      "learning_rate": 9.762536916949759e-05,
      "loss": 2.4225,
      "step": 1433
    },
    {
      "epoch": 0.5571501420558025,
      "grad_norm": 0.73046875,
      "learning_rate": 9.748970371836688e-05,
      "loss": 2.4599,
      "step": 1434
    },
    {
      "epoch": 0.5575386707462179,
      "grad_norm": 0.66796875,
      "learning_rate": 9.735404289022316e-05,
      "loss": 2.2971,
      "step": 1435
    },
    {
      "epoch": 0.5579271994366334,
      "grad_norm": 0.6796875,
      "learning_rate": 9.721838693490079e-05,
      "loss": 2.2642,
      "step": 1436
    },
    {
      "epoch": 0.5583157281270489,
      "grad_norm": 0.671875,
      "learning_rate": 9.708273610222512e-05,
      "loss": 2.3131,
      "step": 1437
    },
    {
      "epoch": 0.5587042568174644,
      "grad_norm": 0.74609375,
      "learning_rate": 9.69470906420121e-05,
      "loss": 2.3757,
      "step": 1438
    },
    {
      "epoch": 0.5590927855078799,
      "grad_norm": 0.69921875,
      "learning_rate": 9.68114508040678e-05,
      "loss": 2.3271,
      "step": 1439
    },
    {
      "epoch": 0.5594813141982953,
      "grad_norm": 0.6953125,
      "learning_rate": 9.667581683818791e-05,
      "loss": 2.3897,
      "step": 1440
    },
    {
      "epoch": 0.5598698428887108,
      "grad_norm": 0.703125,
      "learning_rate": 9.65401889941573e-05,
      "loss": 2.4096,
      "step": 1441
    },
    {
      "epoch": 0.5602583715791263,
      "grad_norm": 0.67578125,
      "learning_rate": 9.640456752174957e-05,
      "loss": 2.3779,
      "step": 1442
    },
    {
      "epoch": 0.5606469002695418,
      "grad_norm": 0.69140625,
      "learning_rate": 9.626895267072662e-05,
      "loss": 2.3463,
      "step": 1443
    },
    {
      "epoch": 0.5610354289599573,
      "grad_norm": 0.67578125,
      "learning_rate": 9.613334469083811e-05,
      "loss": 2.2979,
      "step": 1444
    },
    {
      "epoch": 0.5614239576503728,
      "grad_norm": 0.65625,
      "learning_rate": 9.599774383182106e-05,
      "loss": 2.3747,
      "step": 1445
    },
    {
      "epoch": 0.5618124863407882,
      "grad_norm": 0.7578125,
      "learning_rate": 9.586215034339939e-05,
      "loss": 2.3214,
      "step": 1446
    },
    {
      "epoch": 0.5622010150312037,
      "grad_norm": 0.7578125,
      "learning_rate": 9.572656447528345e-05,
      "loss": 2.398,
      "step": 1447
    },
    {
      "epoch": 0.5625895437216192,
      "grad_norm": 0.765625,
      "learning_rate": 9.559098647716952e-05,
      "loss": 2.4058,
      "step": 1448
    },
    {
      "epoch": 0.5629780724120347,
      "grad_norm": 0.72265625,
      "learning_rate": 9.545541659873945e-05,
      "loss": 2.425,
      "step": 1449
    },
    {
      "epoch": 0.5633666011024502,
      "grad_norm": 0.71875,
      "learning_rate": 9.531985508966004e-05,
      "loss": 2.3954,
      "step": 1450
    },
    {
      "epoch": 0.5637551297928657,
      "grad_norm": 0.78515625,
      "learning_rate": 9.518430219958277e-05,
      "loss": 2.3367,
      "step": 1451
    },
    {
      "epoch": 0.5641436584832811,
      "grad_norm": 0.71484375,
      "learning_rate": 9.504875817814321e-05,
      "loss": 2.3757,
      "step": 1452
    },
    {
      "epoch": 0.5645321871736966,
      "grad_norm": 0.66796875,
      "learning_rate": 9.491322327496062e-05,
      "loss": 2.3418,
      "step": 1453
    },
    {
      "epoch": 0.5649207158641121,
      "grad_norm": 0.6875,
      "learning_rate": 9.477769773963738e-05,
      "loss": 2.3674,
      "step": 1454
    },
    {
      "epoch": 0.5653092445545276,
      "grad_norm": 0.73828125,
      "learning_rate": 9.464218182175875e-05,
      "loss": 2.3464,
      "step": 1455
    },
    {
      "epoch": 0.5656977732449431,
      "grad_norm": 0.72265625,
      "learning_rate": 9.450667577089217e-05,
      "loss": 2.3947,
      "step": 1456
    },
    {
      "epoch": 0.5660863019353586,
      "grad_norm": 0.70703125,
      "learning_rate": 9.437117983658701e-05,
      "loss": 2.3727,
      "step": 1457
    },
    {
      "epoch": 0.566474830625774,
      "grad_norm": 0.6875,
      "learning_rate": 9.423569426837388e-05,
      "loss": 2.4071,
      "step": 1458
    },
    {
      "epoch": 0.5668633593161895,
      "grad_norm": 0.71484375,
      "learning_rate": 9.41002193157644e-05,
      "loss": 2.3527,
      "step": 1459
    },
    {
      "epoch": 0.567251888006605,
      "grad_norm": 0.7578125,
      "learning_rate": 9.396475522825066e-05,
      "loss": 2.3901,
      "step": 1460
    },
    {
      "epoch": 0.5676404166970205,
      "grad_norm": 0.734375,
      "learning_rate": 9.382930225530462e-05,
      "loss": 2.4118,
      "step": 1461
    },
    {
      "epoch": 0.568028945387436,
      "grad_norm": 0.86328125,
      "learning_rate": 9.369386064637785e-05,
      "loss": 2.4239,
      "step": 1462
    },
    {
      "epoch": 0.5684174740778515,
      "grad_norm": 0.6796875,
      "learning_rate": 9.355843065090104e-05,
      "loss": 2.4176,
      "step": 1463
    },
    {
      "epoch": 0.5688060027682669,
      "grad_norm": 0.78515625,
      "learning_rate": 9.342301251828336e-05,
      "loss": 2.4052,
      "step": 1464
    },
    {
      "epoch": 0.5691945314586824,
      "grad_norm": 0.7109375,
      "learning_rate": 9.328760649791227e-05,
      "loss": 2.4543,
      "step": 1465
    },
    {
      "epoch": 0.5695830601490979,
      "grad_norm": 0.81640625,
      "learning_rate": 9.315221283915282e-05,
      "loss": 2.3569,
      "step": 1466
    },
    {
      "epoch": 0.5699715888395134,
      "grad_norm": 0.7109375,
      "learning_rate": 9.301683179134741e-05,
      "loss": 2.4172,
      "step": 1467
    },
    {
      "epoch": 0.5703601175299289,
      "grad_norm": 0.76953125,
      "learning_rate": 9.288146360381507e-05,
      "loss": 2.4128,
      "step": 1468
    },
    {
      "epoch": 0.5707486462203444,
      "grad_norm": 0.765625,
      "learning_rate": 9.274610852585127e-05,
      "loss": 2.347,
      "step": 1469
    },
    {
      "epoch": 0.5711371749107598,
      "grad_norm": 0.74609375,
      "learning_rate": 9.261076680672724e-05,
      "loss": 2.3291,
      "step": 1470
    },
    {
      "epoch": 0.5715257036011753,
      "grad_norm": 0.80859375,
      "learning_rate": 9.24754386956897e-05,
      "loss": 2.3808,
      "step": 1471
    },
    {
      "epoch": 0.5719142322915908,
      "grad_norm": 0.69921875,
      "learning_rate": 9.234012444196029e-05,
      "loss": 2.3653,
      "step": 1472
    },
    {
      "epoch": 0.5723027609820063,
      "grad_norm": 0.80078125,
      "learning_rate": 9.220482429473502e-05,
      "loss": 2.3972,
      "step": 1473
    },
    {
      "epoch": 0.5726912896724218,
      "grad_norm": 0.6640625,
      "learning_rate": 9.206953850318408e-05,
      "loss": 2.3442,
      "step": 1474
    },
    {
      "epoch": 0.5730798183628373,
      "grad_norm": 0.74609375,
      "learning_rate": 9.193426731645112e-05,
      "loss": 2.4275,
      "step": 1475
    },
    {
      "epoch": 0.5734683470532527,
      "grad_norm": 0.71875,
      "learning_rate": 9.179901098365297e-05,
      "loss": 2.3666,
      "step": 1476
    },
    {
      "epoch": 0.5738568757436682,
      "grad_norm": 0.71484375,
      "learning_rate": 9.166376975387898e-05,
      "loss": 2.3713,
      "step": 1477
    },
    {
      "epoch": 0.5742454044340837,
      "grad_norm": 0.74609375,
      "learning_rate": 9.152854387619081e-05,
      "loss": 2.399,
      "step": 1478
    },
    {
      "epoch": 0.5746339331244992,
      "grad_norm": 0.734375,
      "learning_rate": 9.139333359962179e-05,
      "loss": 2.35,
      "step": 1479
    },
    {
      "epoch": 0.5750224618149147,
      "grad_norm": 0.6484375,
      "learning_rate": 9.125813917317656e-05,
      "loss": 2.3904,
      "step": 1480
    },
    {
      "epoch": 0.5754109905053302,
      "grad_norm": 0.7421875,
      "learning_rate": 9.112296084583051e-05,
      "loss": 2.4128,
      "step": 1481
    },
    {
      "epoch": 0.5757995191957456,
      "grad_norm": 0.68359375,
      "learning_rate": 9.098779886652941e-05,
      "loss": 2.3246,
      "step": 1482
    },
    {
      "epoch": 0.5761880478861611,
      "grad_norm": 0.6875,
      "learning_rate": 9.085265348418894e-05,
      "loss": 2.3642,
      "step": 1483
    },
    {
      "epoch": 0.5765765765765766,
      "grad_norm": 0.67578125,
      "learning_rate": 9.071752494769422e-05,
      "loss": 2.3825,
      "step": 1484
    },
    {
      "epoch": 0.5769651052669921,
      "grad_norm": 0.6953125,
      "learning_rate": 9.058241350589929e-05,
      "loss": 2.3952,
      "step": 1485
    },
    {
      "epoch": 0.5773536339574076,
      "grad_norm": 0.69140625,
      "learning_rate": 9.044731940762675e-05,
      "loss": 2.3592,
      "step": 1486
    },
    {
      "epoch": 0.5777421626478231,
      "grad_norm": 0.69921875,
      "learning_rate": 9.031224290166727e-05,
      "loss": 2.4437,
      "step": 1487
    },
    {
      "epoch": 0.5781306913382385,
      "grad_norm": 0.7109375,
      "learning_rate": 9.017718423677911e-05,
      "loss": 2.4009,
      "step": 1488
    },
    {
      "epoch": 0.578519220028654,
      "grad_norm": 0.6796875,
      "learning_rate": 9.004214366168769e-05,
      "loss": 2.3155,
      "step": 1489
    },
    {
      "epoch": 0.5789077487190695,
      "grad_norm": 0.70703125,
      "learning_rate": 8.990712142508505e-05,
      "loss": 2.4467,
      "step": 1490
    },
    {
      "epoch": 0.579296277409485,
      "grad_norm": 0.734375,
      "learning_rate": 8.977211777562953e-05,
      "loss": 2.3164,
      "step": 1491
    },
    {
      "epoch": 0.5796848060999005,
      "grad_norm": 0.703125,
      "learning_rate": 8.963713296194521e-05,
      "loss": 2.3961,
      "step": 1492
    },
    {
      "epoch": 0.580073334790316,
      "grad_norm": 0.68359375,
      "learning_rate": 8.950216723262152e-05,
      "loss": 2.3545,
      "step": 1493
    },
    {
      "epoch": 0.5804618634807314,
      "grad_norm": 0.67578125,
      "learning_rate": 8.936722083621265e-05,
      "loss": 2.4041,
      "step": 1494
    },
    {
      "epoch": 0.5808503921711469,
      "grad_norm": 0.6875,
      "learning_rate": 8.923229402123728e-05,
      "loss": 2.3851,
      "step": 1495
    },
    {
      "epoch": 0.5812389208615624,
      "grad_norm": 0.74609375,
      "learning_rate": 8.909738703617799e-05,
      "loss": 2.3893,
      "step": 1496
    },
    {
      "epoch": 0.5816274495519779,
      "grad_norm": 0.765625,
      "learning_rate": 8.896250012948086e-05,
      "loss": 2.2755,
      "step": 1497
    },
    {
      "epoch": 0.5820159782423934,
      "grad_norm": 0.70703125,
      "learning_rate": 8.882763354955495e-05,
      "loss": 2.4155,
      "step": 1498
    },
    {
      "epoch": 0.5824045069328089,
      "grad_norm": 0.66015625,
      "learning_rate": 8.869278754477193e-05,
      "loss": 2.382,
      "step": 1499
    },
    {
      "epoch": 0.5827930356232243,
      "grad_norm": 0.65625,
      "learning_rate": 8.855796236346557e-05,
      "loss": 2.3534,
      "step": 1500
    },
    {
      "epoch": 0.5831815643136398,
      "grad_norm": 0.65234375,
      "learning_rate": 8.84231582539313e-05,
      "loss": 2.3147,
      "step": 1501
    },
    {
      "epoch": 0.5835700930040553,
      "grad_norm": 0.72265625,
      "learning_rate": 8.82883754644257e-05,
      "loss": 2.3962,
      "step": 1502
    },
    {
      "epoch": 0.5839586216944708,
      "grad_norm": 0.6640625,
      "learning_rate": 8.815361424316616e-05,
      "loss": 2.3999,
      "step": 1503
    },
    {
      "epoch": 0.5843471503848863,
      "grad_norm": 0.70703125,
      "learning_rate": 8.801887483833023e-05,
      "loss": 2.3827,
      "step": 1504
    },
    {
      "epoch": 0.5847356790753017,
      "grad_norm": 0.6640625,
      "learning_rate": 8.788415749805541e-05,
      "loss": 2.3867,
      "step": 1505
    },
    {
      "epoch": 0.5851242077657172,
      "grad_norm": 0.6796875,
      "learning_rate": 8.774946247043852e-05,
      "loss": 2.3742,
      "step": 1506
    },
    {
      "epoch": 0.5855127364561327,
      "grad_norm": 0.6953125,
      "learning_rate": 8.761479000353528e-05,
      "loss": 2.3596,
      "step": 1507
    },
    {
      "epoch": 0.5859012651465482,
      "grad_norm": 0.64453125,
      "learning_rate": 8.748014034535983e-05,
      "loss": 2.3165,
      "step": 1508
    },
    {
      "epoch": 0.5862897938369637,
      "grad_norm": 0.65234375,
      "learning_rate": 8.734551374388436e-05,
      "loss": 2.3892,
      "step": 1509
    },
    {
      "epoch": 0.5866783225273792,
      "grad_norm": 0.67578125,
      "learning_rate": 8.72109104470386e-05,
      "loss": 2.3335,
      "step": 1510
    },
    {
      "epoch": 0.5870668512177946,
      "grad_norm": 0.72265625,
      "learning_rate": 8.707633070270932e-05,
      "loss": 2.4179,
      "step": 1511
    },
    {
      "epoch": 0.5874553799082101,
      "grad_norm": 0.6796875,
      "learning_rate": 8.694177475873997e-05,
      "loss": 2.3839,
      "step": 1512
    },
    {
      "epoch": 0.5878439085986256,
      "grad_norm": 0.69140625,
      "learning_rate": 8.680724286293008e-05,
      "loss": 2.3786,
      "step": 1513
    },
    {
      "epoch": 0.5882324372890411,
      "grad_norm": 0.6796875,
      "learning_rate": 8.667273526303497e-05,
      "loss": 2.3887,
      "step": 1514
    },
    {
      "epoch": 0.5886209659794566,
      "grad_norm": 0.71484375,
      "learning_rate": 8.653825220676521e-05,
      "loss": 2.461,
      "step": 1515
    },
    {
      "epoch": 0.5890094946698721,
      "grad_norm": 0.69140625,
      "learning_rate": 8.640379394178617e-05,
      "loss": 2.3804,
      "step": 1516
    },
    {
      "epoch": 0.5893980233602875,
      "grad_norm": 0.66015625,
      "learning_rate": 8.62693607157175e-05,
      "loss": 2.3552,
      "step": 1517
    },
    {
      "epoch": 0.589786552050703,
      "grad_norm": 0.6875,
      "learning_rate": 8.613495277613281e-05,
      "loss": 2.3252,
      "step": 1518
    },
    {
      "epoch": 0.5901750807411185,
      "grad_norm": 0.6484375,
      "learning_rate": 8.600057037055914e-05,
      "loss": 2.3619,
      "step": 1519
    },
    {
      "epoch": 0.590563609431534,
      "grad_norm": 0.6953125,
      "learning_rate": 8.586621374647646e-05,
      "loss": 2.3739,
      "step": 1520
    },
    {
      "epoch": 0.5909521381219495,
      "grad_norm": 0.80078125,
      "learning_rate": 8.573188315131728e-05,
      "loss": 2.4229,
      "step": 1521
    },
    {
      "epoch": 0.591340666812365,
      "grad_norm": 0.6484375,
      "learning_rate": 8.559757883246617e-05,
      "loss": 2.3038,
      "step": 1522
    },
    {
      "epoch": 0.5917291955027804,
      "grad_norm": 0.68359375,
      "learning_rate": 8.546330103725937e-05,
      "loss": 2.3077,
      "step": 1523
    },
    {
      "epoch": 0.5921177241931959,
      "grad_norm": 0.64453125,
      "learning_rate": 8.53290500129842e-05,
      "loss": 2.3405,
      "step": 1524
    },
    {
      "epoch": 0.5925062528836114,
      "grad_norm": 0.68359375,
      "learning_rate": 8.51948260068787e-05,
      "loss": 2.3246,
      "step": 1525
    },
    {
      "epoch": 0.5928947815740269,
      "grad_norm": 0.6796875,
      "learning_rate": 8.506062926613112e-05,
      "loss": 2.4295,
      "step": 1526
    },
    {
      "epoch": 0.5932833102644424,
      "grad_norm": 0.63671875,
      "learning_rate": 8.492646003787958e-05,
      "loss": 2.3574,
      "step": 1527
    },
    {
      "epoch": 0.5936718389548579,
      "grad_norm": 0.69140625,
      "learning_rate": 8.479231856921148e-05,
      "loss": 2.4245,
      "step": 1528
    },
    {
      "epoch": 0.5940603676452733,
      "grad_norm": 0.671875,
      "learning_rate": 8.465820510716311e-05,
      "loss": 2.3591,
      "step": 1529
    },
    {
      "epoch": 0.5944488963356888,
      "grad_norm": 0.7109375,
      "learning_rate": 8.452411989871916e-05,
      "loss": 2.4629,
      "step": 1530
    },
    {
      "epoch": 0.5948374250261043,
      "grad_norm": 0.66015625,
      "learning_rate": 8.439006319081228e-05,
      "loss": 2.3193,
      "step": 1531
    },
    {
      "epoch": 0.5952259537165198,
      "grad_norm": 0.69921875,
      "learning_rate": 8.425603523032269e-05,
      "loss": 2.3394,
      "step": 1532
    },
    {
      "epoch": 0.5956144824069353,
      "grad_norm": 0.65234375,
      "learning_rate": 8.412203626407766e-05,
      "loss": 2.322,
      "step": 1533
    },
    {
      "epoch": 0.5960030110973508,
      "grad_norm": 0.6796875,
      "learning_rate": 8.398806653885098e-05,
      "loss": 2.3262,
      "step": 1534
    },
    {
      "epoch": 0.5963915397877662,
      "grad_norm": 0.69921875,
      "learning_rate": 8.385412630136267e-05,
      "loss": 2.3941,
      "step": 1535
    },
    {
      "epoch": 0.5967800684781817,
      "grad_norm": 0.73046875,
      "learning_rate": 8.372021579827844e-05,
      "loss": 2.3708,
      "step": 1536
    },
    {
      "epoch": 0.5971685971685972,
      "grad_norm": 0.7421875,
      "learning_rate": 8.358633527620923e-05,
      "loss": 2.3973,
      "step": 1537
    },
    {
      "epoch": 0.5975571258590127,
      "grad_norm": 15.1875,
      "learning_rate": 8.345248498171073e-05,
      "loss": 2.3555,
      "step": 1538
    },
    {
      "epoch": 0.5979456545494282,
      "grad_norm": 0.765625,
      "learning_rate": 8.331866516128302e-05,
      "loss": 2.3586,
      "step": 1539
    },
    {
      "epoch": 0.5983341832398437,
      "grad_norm": 0.74609375,
      "learning_rate": 8.318487606137001e-05,
      "loss": 2.3895,
      "step": 1540
    },
    {
      "epoch": 0.598722711930259,
      "grad_norm": 0.70703125,
      "learning_rate": 8.305111792835911e-05,
      "loss": 2.341,
      "step": 1541
    },
    {
      "epoch": 0.5991112406206746,
      "grad_norm": 0.63671875,
      "learning_rate": 8.291739100858058e-05,
      "loss": 2.3371,
      "step": 1542
    },
    {
      "epoch": 0.5994997693110901,
      "grad_norm": 0.65234375,
      "learning_rate": 8.278369554830729e-05,
      "loss": 2.3742,
      "step": 1543
    },
    {
      "epoch": 0.5998882980015056,
      "grad_norm": 0.6640625,
      "learning_rate": 8.265003179375418e-05,
      "loss": 2.394,
      "step": 1544
    },
    {
      "epoch": 0.6002768266919211,
      "grad_norm": 0.70703125,
      "learning_rate": 8.251639999107776e-05,
      "loss": 2.3719,
      "step": 1545
    },
    {
      "epoch": 0.6006653553823366,
      "grad_norm": 0.71875,
      "learning_rate": 8.238280038637566e-05,
      "loss": 2.3728,
      "step": 1546
    },
    {
      "epoch": 0.601053884072752,
      "grad_norm": 0.68359375,
      "learning_rate": 8.224923322568637e-05,
      "loss": 2.3766,
      "step": 1547
    },
    {
      "epoch": 0.6014424127631675,
      "grad_norm": 0.66015625,
      "learning_rate": 8.211569875498837e-05,
      "loss": 2.3564,
      "step": 1548
    },
    {
      "epoch": 0.601830941453583,
      "grad_norm": 0.6640625,
      "learning_rate": 8.198219722020019e-05,
      "loss": 2.3739,
      "step": 1549
    },
    {
      "epoch": 0.6022194701439985,
      "grad_norm": 0.71484375,
      "learning_rate": 8.184872886717956e-05,
      "loss": 2.4159,
      "step": 1550
    },
    {
      "epoch": 0.602607998834414,
      "grad_norm": 0.734375,
      "learning_rate": 8.17152939417232e-05,
      "loss": 2.3322,
      "step": 1551
    },
    {
      "epoch": 0.6029965275248295,
      "grad_norm": 0.69921875,
      "learning_rate": 8.158189268956619e-05,
      "loss": 2.3703,
      "step": 1552
    },
    {
      "epoch": 0.6033850562152449,
      "grad_norm": 0.65625,
      "learning_rate": 8.144852535638162e-05,
      "loss": 2.3311,
      "step": 1553
    },
    {
      "epoch": 0.6037735849056604,
      "grad_norm": 0.6875,
      "learning_rate": 8.131519218778008e-05,
      "loss": 2.3832,
      "step": 1554
    },
    {
      "epoch": 0.6041621135960759,
      "grad_norm": 0.640625,
      "learning_rate": 8.118189342930936e-05,
      "loss": 2.3851,
      "step": 1555
    },
    {
      "epoch": 0.6045506422864914,
      "grad_norm": 0.703125,
      "learning_rate": 8.104862932645374e-05,
      "loss": 2.3566,
      "step": 1556
    },
    {
      "epoch": 0.6049391709769069,
      "grad_norm": 0.671875,
      "learning_rate": 8.091540012463375e-05,
      "loss": 2.3132,
      "step": 1557
    },
    {
      "epoch": 0.6053276996673224,
      "grad_norm": 0.66015625,
      "learning_rate": 8.078220606920564e-05,
      "loss": 2.3965,
      "step": 1558
    },
    {
      "epoch": 0.6057162283577378,
      "grad_norm": 0.72265625,
      "learning_rate": 8.064904740546092e-05,
      "loss": 2.4171,
      "step": 1559
    },
    {
      "epoch": 0.6061047570481533,
      "grad_norm": 0.6875,
      "learning_rate": 8.051592437862597e-05,
      "loss": 2.3941,
      "step": 1560
    },
    {
      "epoch": 0.6064932857385688,
      "grad_norm": 0.69921875,
      "learning_rate": 8.038283723386144e-05,
      "loss": 2.4175,
      "step": 1561
    },
    {
      "epoch": 0.6068818144289843,
      "grad_norm": 0.6640625,
      "learning_rate": 8.024978621626199e-05,
      "loss": 2.3083,
      "step": 1562
    },
    {
      "epoch": 0.6072703431193998,
      "grad_norm": 0.70703125,
      "learning_rate": 8.011677157085571e-05,
      "loss": 2.3282,
      "step": 1563
    },
    {
      "epoch": 0.6076588718098153,
      "grad_norm": 0.68359375,
      "learning_rate": 7.998379354260375e-05,
      "loss": 2.3535,
      "step": 1564
    },
    {
      "epoch": 0.6080474005002307,
      "grad_norm": 0.71875,
      "learning_rate": 7.985085237639973e-05,
      "loss": 2.424,
      "step": 1565
    },
    {
      "epoch": 0.6084359291906462,
      "grad_norm": 0.734375,
      "learning_rate": 7.971794831706947e-05,
      "loss": 2.2892,
      "step": 1566
    },
    {
      "epoch": 0.6088244578810617,
      "grad_norm": 0.7578125,
      "learning_rate": 7.958508160937044e-05,
      "loss": 2.315,
      "step": 1567
    },
    {
      "epoch": 0.6092129865714772,
      "grad_norm": 0.7265625,
      "learning_rate": 7.94522524979913e-05,
      "loss": 2.3734,
      "step": 1568
    },
    {
      "epoch": 0.6096015152618927,
      "grad_norm": 0.6640625,
      "learning_rate": 7.931946122755149e-05,
      "loss": 2.333,
      "step": 1569
    },
    {
      "epoch": 0.6099900439523082,
      "grad_norm": 0.6640625,
      "learning_rate": 7.918670804260074e-05,
      "loss": 2.3811,
      "step": 1570
    },
    {
      "epoch": 0.6103785726427235,
      "grad_norm": 0.66796875,
      "learning_rate": 7.905399318761869e-05,
      "loss": 2.362,
      "step": 1571
    },
    {
      "epoch": 0.610767101333139,
      "grad_norm": 0.66015625,
      "learning_rate": 7.89213169070143e-05,
      "loss": 2.3697,
      "step": 1572
    },
    {
      "epoch": 0.6111556300235546,
      "grad_norm": 0.65234375,
      "learning_rate": 7.878867944512561e-05,
      "loss": 2.3693,
      "step": 1573
    },
    {
      "epoch": 0.61154415871397,
      "grad_norm": 0.671875,
      "learning_rate": 7.865608104621907e-05,
      "loss": 2.3939,
      "step": 1574
    },
    {
      "epoch": 0.6119326874043856,
      "grad_norm": 0.65234375,
      "learning_rate": 7.852352195448923e-05,
      "loss": 2.4086,
      "step": 1575
    },
    {
      "epoch": 0.612321216094801,
      "grad_norm": 0.69921875,
      "learning_rate": 7.839100241405828e-05,
      "loss": 2.3811,
      "step": 1576
    },
    {
      "epoch": 0.6127097447852164,
      "grad_norm": 0.6328125,
      "learning_rate": 7.825852266897553e-05,
      "loss": 2.2625,
      "step": 1577
    },
    {
      "epoch": 0.613098273475632,
      "grad_norm": 0.66796875,
      "learning_rate": 7.8126082963217e-05,
      "loss": 2.4848,
      "step": 1578
    },
    {
      "epoch": 0.6134868021660475,
      "grad_norm": 0.72265625,
      "learning_rate": 7.799368354068502e-05,
      "loss": 2.4191,
      "step": 1579
    },
    {
      "epoch": 0.613875330856463,
      "grad_norm": 0.6484375,
      "learning_rate": 7.786132464520768e-05,
      "loss": 2.3593,
      "step": 1580
    },
    {
      "epoch": 0.6142638595468785,
      "grad_norm": 0.67578125,
      "learning_rate": 7.772900652053848e-05,
      "loss": 2.3714,
      "step": 1581
    },
    {
      "epoch": 0.6146523882372938,
      "grad_norm": 0.6875,
      "learning_rate": 7.759672941035579e-05,
      "loss": 2.3637,
      "step": 1582
    },
    {
      "epoch": 0.6150409169277093,
      "grad_norm": 0.6328125,
      "learning_rate": 7.746449355826246e-05,
      "loss": 2.2919,
      "step": 1583
    },
    {
      "epoch": 0.6154294456181248,
      "grad_norm": 0.6796875,
      "learning_rate": 7.733229920778542e-05,
      "loss": 2.3922,
      "step": 1584
    },
    {
      "epoch": 0.6158179743085404,
      "grad_norm": 0.64453125,
      "learning_rate": 7.720014660237512e-05,
      "loss": 2.4041,
      "step": 1585
    },
    {
      "epoch": 0.6162065029989559,
      "grad_norm": 0.65625,
      "learning_rate": 7.706803598540506e-05,
      "loss": 2.3844,
      "step": 1586
    },
    {
      "epoch": 0.6165950316893714,
      "grad_norm": 0.671875,
      "learning_rate": 7.693596760017156e-05,
      "loss": 2.4035,
      "step": 1587
    },
    {
      "epoch": 0.6169835603797867,
      "grad_norm": 0.67578125,
      "learning_rate": 7.680394168989306e-05,
      "loss": 2.3288,
      "step": 1588
    },
    {
      "epoch": 0.6173720890702022,
      "grad_norm": 0.6796875,
      "learning_rate": 7.66719584977098e-05,
      "loss": 2.3953,
      "step": 1589
    },
    {
      "epoch": 0.6177606177606177,
      "grad_norm": 0.6640625,
      "learning_rate": 7.654001826668342e-05,
      "loss": 2.3341,
      "step": 1590
    },
    {
      "epoch": 0.6181491464510332,
      "grad_norm": 0.66796875,
      "learning_rate": 7.640812123979635e-05,
      "loss": 2.3844,
      "step": 1591
    },
    {
      "epoch": 0.6185376751414488,
      "grad_norm": 0.6328125,
      "learning_rate": 7.627626765995144e-05,
      "loss": 2.3106,
      "step": 1592
    },
    {
      "epoch": 0.6189262038318643,
      "grad_norm": 0.76171875,
      "learning_rate": 7.61444577699716e-05,
      "loss": 2.3895,
      "step": 1593
    },
    {
      "epoch": 0.6193147325222796,
      "grad_norm": 0.6796875,
      "learning_rate": 7.601269181259926e-05,
      "loss": 2.3381,
      "step": 1594
    },
    {
      "epoch": 0.6197032612126951,
      "grad_norm": 0.63671875,
      "learning_rate": 7.588097003049596e-05,
      "loss": 2.3945,
      "step": 1595
    },
    {
      "epoch": 0.6200917899031106,
      "grad_norm": 0.640625,
      "learning_rate": 7.574929266624185e-05,
      "loss": 2.3523,
      "step": 1596
    },
    {
      "epoch": 0.6204803185935261,
      "grad_norm": 0.66015625,
      "learning_rate": 7.561765996233527e-05,
      "loss": 2.3788,
      "step": 1597
    },
    {
      "epoch": 0.6208688472839416,
      "grad_norm": 0.6875,
      "learning_rate": 7.548607216119236e-05,
      "loss": 2.426,
      "step": 1598
    },
    {
      "epoch": 0.6212573759743572,
      "grad_norm": 0.625,
      "learning_rate": 7.535452950514655e-05,
      "loss": 2.2676,
      "step": 1599
    },
    {
      "epoch": 0.6216459046647725,
      "grad_norm": 0.75390625,
      "learning_rate": 7.522303223644814e-05,
      "loss": 2.3779,
      "step": 1600
    },
    {
      "epoch": 0.622034433355188,
      "grad_norm": 0.62890625,
      "learning_rate": 7.509158059726375e-05,
      "loss": 2.299,
      "step": 1601
    },
    {
      "epoch": 0.6224229620456035,
      "grad_norm": 0.68359375,
      "learning_rate": 7.496017482967613e-05,
      "loss": 2.3966,
      "step": 1602
    },
    {
      "epoch": 0.622811490736019,
      "grad_norm": 0.6796875,
      "learning_rate": 7.482881517568343e-05,
      "loss": 2.3278,
      "step": 1603
    },
    {
      "epoch": 0.6232000194264345,
      "grad_norm": 0.703125,
      "learning_rate": 7.469750187719895e-05,
      "loss": 2.3683,
      "step": 1604
    },
    {
      "epoch": 0.62358854811685,
      "grad_norm": 0.66796875,
      "learning_rate": 7.456623517605051e-05,
      "loss": 2.3648,
      "step": 1605
    },
    {
      "epoch": 0.6239770768072654,
      "grad_norm": 0.671875,
      "learning_rate": 7.443501531398027e-05,
      "loss": 2.3241,
      "step": 1606
    },
    {
      "epoch": 0.6243656054976809,
      "grad_norm": 0.640625,
      "learning_rate": 7.430384253264402e-05,
      "loss": 2.3535,
      "step": 1607
    },
    {
      "epoch": 0.6247541341880964,
      "grad_norm": 0.6796875,
      "learning_rate": 7.41727170736109e-05,
      "loss": 2.3948,
      "step": 1608
    },
    {
      "epoch": 0.625142662878512,
      "grad_norm": 0.62890625,
      "learning_rate": 7.404163917836284e-05,
      "loss": 2.3515,
      "step": 1609
    },
    {
      "epoch": 0.6255311915689274,
      "grad_norm": 0.67578125,
      "learning_rate": 7.391060908829426e-05,
      "loss": 2.3847,
      "step": 1610
    },
    {
      "epoch": 0.625919720259343,
      "grad_norm": 0.67578125,
      "learning_rate": 7.377962704471147e-05,
      "loss": 2.4048,
      "step": 1611
    },
    {
      "epoch": 0.6263082489497583,
      "grad_norm": 0.66796875,
      "learning_rate": 7.364869328883232e-05,
      "loss": 2.4245,
      "step": 1612
    },
    {
      "epoch": 0.6266967776401738,
      "grad_norm": 0.765625,
      "learning_rate": 7.351780806178577e-05,
      "loss": 2.3432,
      "step": 1613
    },
    {
      "epoch": 0.6270853063305893,
      "grad_norm": 0.63671875,
      "learning_rate": 7.338697160461135e-05,
      "loss": 2.425,
      "step": 1614
    },
    {
      "epoch": 0.6274738350210048,
      "grad_norm": 0.65234375,
      "learning_rate": 7.325618415825878e-05,
      "loss": 2.2868,
      "step": 1615
    },
    {
      "epoch": 0.6278623637114203,
      "grad_norm": 0.74609375,
      "learning_rate": 7.31254459635876e-05,
      "loss": 2.3369,
      "step": 1616
    },
    {
      "epoch": 0.6282508924018358,
      "grad_norm": 0.6640625,
      "learning_rate": 7.299475726136655e-05,
      "loss": 2.3966,
      "step": 1617
    },
    {
      "epoch": 0.6286394210922512,
      "grad_norm": 0.6328125,
      "learning_rate": 7.286411829227326e-05,
      "loss": 2.3717,
      "step": 1618
    },
    {
      "epoch": 0.6290279497826667,
      "grad_norm": 0.640625,
      "learning_rate": 7.27335292968938e-05,
      "loss": 2.3432,
      "step": 1619
    },
    {
      "epoch": 0.6294164784730822,
      "grad_norm": 0.66015625,
      "learning_rate": 7.260299051572216e-05,
      "loss": 2.3927,
      "step": 1620
    },
    {
      "epoch": 0.6298050071634977,
      "grad_norm": 0.71875,
      "learning_rate": 7.24725021891599e-05,
      "loss": 2.3983,
      "step": 1621
    },
    {
      "epoch": 0.6301935358539132,
      "grad_norm": 0.6484375,
      "learning_rate": 7.234206455751562e-05,
      "loss": 2.2967,
      "step": 1622
    },
    {
      "epoch": 0.6305820645443287,
      "grad_norm": 0.69140625,
      "learning_rate": 7.221167786100458e-05,
      "loss": 2.4191,
      "step": 1623
    },
    {
      "epoch": 0.6309705932347441,
      "grad_norm": 0.67578125,
      "learning_rate": 7.208134233974825e-05,
      "loss": 2.3658,
      "step": 1624
    },
    {
      "epoch": 0.6313591219251596,
      "grad_norm": 0.62890625,
      "learning_rate": 7.195105823377384e-05,
      "loss": 2.3469,
      "step": 1625
    },
    {
      "epoch": 0.6317476506155751,
      "grad_norm": 0.65234375,
      "learning_rate": 7.182082578301388e-05,
      "loss": 2.3892,
      "step": 1626
    },
    {
      "epoch": 0.6321361793059906,
      "grad_norm": 0.64453125,
      "learning_rate": 7.169064522730573e-05,
      "loss": 2.3862,
      "step": 1627
    },
    {
      "epoch": 0.6325247079964061,
      "grad_norm": 0.6171875,
      "learning_rate": 7.156051680639126e-05,
      "loss": 2.3799,
      "step": 1628
    },
    {
      "epoch": 0.6329132366868216,
      "grad_norm": 0.6796875,
      "learning_rate": 7.143044075991626e-05,
      "loss": 2.3772,
      "step": 1629
    },
    {
      "epoch": 0.633301765377237,
      "grad_norm": 0.65625,
      "learning_rate": 7.130041732743014e-05,
      "loss": 2.2959,
      "step": 1630
    },
    {
      "epoch": 0.6336902940676525,
      "grad_norm": 0.64453125,
      "learning_rate": 7.117044674838527e-05,
      "loss": 2.4059,
      "step": 1631
    },
    {
      "epoch": 0.634078822758068,
      "grad_norm": 0.64453125,
      "learning_rate": 7.104052926213687e-05,
      "loss": 2.3168,
      "step": 1632
    },
    {
      "epoch": 0.6344673514484835,
      "grad_norm": 0.62890625,
      "learning_rate": 7.091066510794224e-05,
      "loss": 2.3378,
      "step": 1633
    },
    {
      "epoch": 0.634855880138899,
      "grad_norm": 0.671875,
      "learning_rate": 7.07808545249606e-05,
      "loss": 2.3458,
      "step": 1634
    },
    {
      "epoch": 0.6352444088293145,
      "grad_norm": 0.6328125,
      "learning_rate": 7.065109775225231e-05,
      "loss": 2.3979,
      "step": 1635
    },
    {
      "epoch": 0.6356329375197299,
      "grad_norm": 0.6484375,
      "learning_rate": 7.052139502877886e-05,
      "loss": 2.3766,
      "step": 1636
    },
    {
      "epoch": 0.6360214662101454,
      "grad_norm": 0.65625,
      "learning_rate": 7.039174659340202e-05,
      "loss": 2.3896,
      "step": 1637
    },
    {
      "epoch": 0.6364099949005609,
      "grad_norm": 0.6796875,
      "learning_rate": 7.026215268488367e-05,
      "loss": 2.3584,
      "step": 1638
    },
    {
      "epoch": 0.6367985235909764,
      "grad_norm": 0.640625,
      "learning_rate": 7.013261354188529e-05,
      "loss": 2.3304,
      "step": 1639
    },
    {
      "epoch": 0.6371870522813919,
      "grad_norm": 0.6640625,
      "learning_rate": 7.00031294029675e-05,
      "loss": 2.3706,
      "step": 1640
    },
    {
      "epoch": 0.6375755809718073,
      "grad_norm": 0.65234375,
      "learning_rate": 6.98737005065895e-05,
      "loss": 2.3809,
      "step": 1641
    },
    {
      "epoch": 0.6379641096622228,
      "grad_norm": 0.62890625,
      "learning_rate": 6.974432709110895e-05,
      "loss": 2.337,
      "step": 1642
    },
    {
      "epoch": 0.6383526383526383,
      "grad_norm": 0.6171875,
      "learning_rate": 6.961500939478118e-05,
      "loss": 2.4023,
      "step": 1643
    },
    {
      "epoch": 0.6387411670430538,
      "grad_norm": 0.67578125,
      "learning_rate": 6.948574765575902e-05,
      "loss": 2.3571,
      "step": 1644
    },
    {
      "epoch": 0.6391296957334693,
      "grad_norm": 0.6328125,
      "learning_rate": 6.935654211209213e-05,
      "loss": 2.3675,
      "step": 1645
    },
    {
      "epoch": 0.6395182244238848,
      "grad_norm": 0.6484375,
      "learning_rate": 6.922739300172678e-05,
      "loss": 2.3294,
      "step": 1646
    },
    {
      "epoch": 0.6399067531143002,
      "grad_norm": 0.65625,
      "learning_rate": 6.909830056250527e-05,
      "loss": 2.3399,
      "step": 1647
    },
    {
      "epoch": 0.6402952818047157,
      "grad_norm": 0.66015625,
      "learning_rate": 6.896926503216556e-05,
      "loss": 2.4404,
      "step": 1648
    },
    {
      "epoch": 0.6406838104951312,
      "grad_norm": 0.671875,
      "learning_rate": 6.884028664834075e-05,
      "loss": 2.4031,
      "step": 1649
    },
    {
      "epoch": 0.6410723391855467,
      "grad_norm": 0.66015625,
      "learning_rate": 6.871136564855875e-05,
      "loss": 2.3244,
      "step": 1650
    },
    {
      "epoch": 0.6414608678759622,
      "grad_norm": 0.66796875,
      "learning_rate": 6.858250227024178e-05,
      "loss": 2.2752,
      "step": 1651
    },
    {
      "epoch": 0.6418493965663777,
      "grad_norm": 0.64453125,
      "learning_rate": 6.845369675070594e-05,
      "loss": 2.3314,
      "step": 1652
    },
    {
      "epoch": 0.6422379252567931,
      "grad_norm": 0.62890625,
      "learning_rate": 6.832494932716078e-05,
      "loss": 2.3836,
      "step": 1653
    },
    {
      "epoch": 0.6426264539472086,
      "grad_norm": 0.64453125,
      "learning_rate": 6.819626023670882e-05,
      "loss": 2.2433,
      "step": 1654
    },
    {
      "epoch": 0.6430149826376241,
      "grad_norm": 0.64453125,
      "learning_rate": 6.806762971634524e-05,
      "loss": 2.4247,
      "step": 1655
    },
    {
      "epoch": 0.6434035113280396,
      "grad_norm": 0.66015625,
      "learning_rate": 6.793905800295726e-05,
      "loss": 2.3075,
      "step": 1656
    },
    {
      "epoch": 0.6437920400184551,
      "grad_norm": 0.6171875,
      "learning_rate": 6.78105453333239e-05,
      "loss": 2.3406,
      "step": 1657
    },
    {
      "epoch": 0.6441805687088706,
      "grad_norm": 0.66796875,
      "learning_rate": 6.768209194411532e-05,
      "loss": 2.3682,
      "step": 1658
    },
    {
      "epoch": 0.644569097399286,
      "grad_norm": 0.71875,
      "learning_rate": 6.755369807189263e-05,
      "loss": 2.3562,
      "step": 1659
    },
    {
      "epoch": 0.6449576260897015,
      "grad_norm": 0.70703125,
      "learning_rate": 6.742536395310726e-05,
      "loss": 2.3898,
      "step": 1660
    },
    {
      "epoch": 0.645346154780117,
      "grad_norm": 0.625,
      "learning_rate": 6.729708982410066e-05,
      "loss": 2.4003,
      "step": 1661
    },
    {
      "epoch": 0.6457346834705325,
      "grad_norm": 0.640625,
      "learning_rate": 6.71688759211037e-05,
      "loss": 2.3614,
      "step": 1662
    },
    {
      "epoch": 0.646123212160948,
      "grad_norm": 0.65234375,
      "learning_rate": 6.704072248023643e-05,
      "loss": 2.3815,
      "step": 1663
    },
    {
      "epoch": 0.6465117408513635,
      "grad_norm": 0.66796875,
      "learning_rate": 6.691262973750751e-05,
      "loss": 2.3747,
      "step": 1664
    },
    {
      "epoch": 0.6469002695417789,
      "grad_norm": 0.67578125,
      "learning_rate": 6.678459792881387e-05,
      "loss": 2.3865,
      "step": 1665
    },
    {
      "epoch": 0.6472887982321944,
      "grad_norm": 0.6640625,
      "learning_rate": 6.665662728994013e-05,
      "loss": 2.2916,
      "step": 1666
    },
    {
      "epoch": 0.6476773269226099,
      "grad_norm": 0.65625,
      "learning_rate": 6.652871805655832e-05,
      "loss": 2.285,
      "step": 1667
    },
    {
      "epoch": 0.6480658556130254,
      "grad_norm": 0.62890625,
      "learning_rate": 6.640087046422738e-05,
      "loss": 2.3718,
      "step": 1668
    },
    {
      "epoch": 0.6484543843034409,
      "grad_norm": 0.66796875,
      "learning_rate": 6.627308474839277e-05,
      "loss": 2.3982,
      "step": 1669
    },
    {
      "epoch": 0.6488429129938564,
      "grad_norm": 0.65234375,
      "learning_rate": 6.614536114438589e-05,
      "loss": 2.4418,
      "step": 1670
    },
    {
      "epoch": 0.6492314416842718,
      "grad_norm": 0.66796875,
      "learning_rate": 6.601769988742386e-05,
      "loss": 2.3537,
      "step": 1671
    },
    {
      "epoch": 0.6496199703746873,
      "grad_norm": 0.6796875,
      "learning_rate": 6.58901012126089e-05,
      "loss": 2.3693,
      "step": 1672
    },
    {
      "epoch": 0.6500084990651028,
      "grad_norm": 0.640625,
      "learning_rate": 6.576256535492806e-05,
      "loss": 2.3887,
      "step": 1673
    },
    {
      "epoch": 0.6503970277555183,
      "grad_norm": 0.67578125,
      "learning_rate": 6.563509254925268e-05,
      "loss": 2.3907,
      "step": 1674
    },
    {
      "epoch": 0.6507855564459338,
      "grad_norm": 0.65234375,
      "learning_rate": 6.550768303033789e-05,
      "loss": 2.398,
      "step": 1675
    },
    {
      "epoch": 0.6511740851363493,
      "grad_norm": 0.71875,
      "learning_rate": 6.538033703282243e-05,
      "loss": 2.3144,
      "step": 1676
    },
    {
      "epoch": 0.6515626138267647,
      "grad_norm": 0.6640625,
      "learning_rate": 6.52530547912279e-05,
      "loss": 2.3954,
      "step": 1677
    },
    {
      "epoch": 0.6519511425171802,
      "grad_norm": 0.68359375,
      "learning_rate": 6.512583653995867e-05,
      "loss": 2.4037,
      "step": 1678
    },
    {
      "epoch": 0.6523396712075957,
      "grad_norm": 0.73828125,
      "learning_rate": 6.499868251330102e-05,
      "loss": 2.3779,
      "step": 1679
    },
    {
      "epoch": 0.6527281998980112,
      "grad_norm": 0.65234375,
      "learning_rate": 6.48715929454232e-05,
      "loss": 2.3817,
      "step": 1680
    },
    {
      "epoch": 0.6531167285884267,
      "grad_norm": 0.66796875,
      "learning_rate": 6.474456807037456e-05,
      "loss": 2.3627,
      "step": 1681
    },
    {
      "epoch": 0.6535052572788422,
      "grad_norm": 0.6640625,
      "learning_rate": 6.461760812208544e-05,
      "loss": 2.3574,
      "step": 1682
    },
    {
      "epoch": 0.6538937859692576,
      "grad_norm": 0.6796875,
      "learning_rate": 6.449071333436654e-05,
      "loss": 2.3536,
      "step": 1683
    },
    {
      "epoch": 0.6542823146596731,
      "grad_norm": 0.6640625,
      "learning_rate": 6.436388394090862e-05,
      "loss": 2.3958,
      "step": 1684
    },
    {
      "epoch": 0.6546708433500886,
      "grad_norm": 0.640625,
      "learning_rate": 6.423712017528193e-05,
      "loss": 2.3059,
      "step": 1685
    },
    {
      "epoch": 0.6550593720405041,
      "grad_norm": 0.66015625,
      "learning_rate": 6.411042227093592e-05,
      "loss": 2.4365,
      "step": 1686
    },
    {
      "epoch": 0.6554479007309196,
      "grad_norm": 0.67578125,
      "learning_rate": 6.398379046119873e-05,
      "loss": 2.3754,
      "step": 1687
    },
    {
      "epoch": 0.6558364294213351,
      "grad_norm": 0.65625,
      "learning_rate": 6.38572249792768e-05,
      "loss": 2.3531,
      "step": 1688
    },
    {
      "epoch": 0.6562249581117505,
      "grad_norm": 0.6171875,
      "learning_rate": 6.373072605825435e-05,
      "loss": 2.3918,
      "step": 1689
    },
    {
      "epoch": 0.656613486802166,
      "grad_norm": 0.62109375,
      "learning_rate": 6.360429393109311e-05,
      "loss": 2.2982,
      "step": 1690
    },
    {
      "epoch": 0.6570020154925815,
      "grad_norm": 0.70703125,
      "learning_rate": 6.347792883063173e-05,
      "loss": 2.4119,
      "step": 1691
    },
    {
      "epoch": 0.657390544182997,
      "grad_norm": 0.65625,
      "learning_rate": 6.335163098958552e-05,
      "loss": 2.3786,
      "step": 1692
    },
    {
      "epoch": 0.6577790728734125,
      "grad_norm": 0.703125,
      "learning_rate": 6.322540064054578e-05,
      "loss": 2.3941,
      "step": 1693
    },
    {
      "epoch": 0.658167601563828,
      "grad_norm": 0.68359375,
      "learning_rate": 6.309923801597965e-05,
      "loss": 2.4113,
      "step": 1694
    },
    {
      "epoch": 0.6585561302542434,
      "grad_norm": 0.6328125,
      "learning_rate": 6.297314334822945e-05,
      "loss": 2.3399,
      "step": 1695
    },
    {
      "epoch": 0.6589446589446589,
      "grad_norm": 0.62890625,
      "learning_rate": 6.284711686951243e-05,
      "loss": 2.385,
      "step": 1696
    },
    {
      "epoch": 0.6593331876350744,
      "grad_norm": 0.72265625,
      "learning_rate": 6.272115881192025e-05,
      "loss": 2.3508,
      "step": 1697
    },
    {
      "epoch": 0.6597217163254899,
      "grad_norm": 0.74609375,
      "learning_rate": 6.259526940741847e-05,
      "loss": 2.3601,
      "step": 1698
    },
    {
      "epoch": 0.6601102450159054,
      "grad_norm": 0.734375,
      "learning_rate": 6.246944888784634e-05,
      "loss": 2.3568,
      "step": 1699
    },
    {
      "epoch": 0.6604987737063209,
      "grad_norm": 0.63671875,
      "learning_rate": 6.234369748491617e-05,
      "loss": 2.3455,
      "step": 1700
    },
    {
      "epoch": 0.6608873023967363,
      "grad_norm": 0.60546875,
      "learning_rate": 6.221801543021306e-05,
      "loss": 2.2836,
      "step": 1701
    },
    {
      "epoch": 0.6612758310871518,
      "grad_norm": 0.671875,
      "learning_rate": 6.209240295519428e-05,
      "loss": 2.4369,
      "step": 1702
    },
    {
      "epoch": 0.6616643597775673,
      "grad_norm": 0.734375,
      "learning_rate": 6.196686029118909e-05,
      "loss": 2.3962,
      "step": 1703
    },
    {
      "epoch": 0.6620528884679828,
      "grad_norm": 0.6640625,
      "learning_rate": 6.184138766939811e-05,
      "loss": 2.3827,
      "step": 1704
    },
    {
      "epoch": 0.6624414171583983,
      "grad_norm": 0.66015625,
      "learning_rate": 6.171598532089299e-05,
      "loss": 2.2286,
      "step": 1705
    },
    {
      "epoch": 0.6628299458488138,
      "grad_norm": 0.6875,
      "learning_rate": 6.159065347661596e-05,
      "loss": 2.3438,
      "step": 1706
    },
    {
      "epoch": 0.6632184745392292,
      "grad_norm": 0.6484375,
      "learning_rate": 6.14653923673794e-05,
      "loss": 2.4297,
      "step": 1707
    },
    {
      "epoch": 0.6636070032296447,
      "grad_norm": 0.61328125,
      "learning_rate": 6.134020222386544e-05,
      "loss": 2.3676,
      "step": 1708
    },
    {
      "epoch": 0.6639955319200602,
      "grad_norm": 0.65625,
      "learning_rate": 6.121508327662553e-05,
      "loss": 2.3893,
      "step": 1709
    },
    {
      "epoch": 0.6643840606104757,
      "grad_norm": 0.6640625,
      "learning_rate": 6.109003575607997e-05,
      "loss": 2.4231,
      "step": 1710
    },
    {
      "epoch": 0.6647725893008912,
      "grad_norm": 0.65234375,
      "learning_rate": 6.096505989251753e-05,
      "loss": 2.3627,
      "step": 1711
    },
    {
      "epoch": 0.6651611179913066,
      "grad_norm": 0.6484375,
      "learning_rate": 6.0840155916095044e-05,
      "loss": 2.3278,
      "step": 1712
    },
    {
      "epoch": 0.6655496466817221,
      "grad_norm": 0.61328125,
      "learning_rate": 6.071532405683691e-05,
      "loss": 2.3325,
      "step": 1713
    },
    {
      "epoch": 0.6659381753721376,
      "grad_norm": 0.6640625,
      "learning_rate": 6.059056454463479e-05,
      "loss": 2.3673,
      "step": 1714
    },
    {
      "epoch": 0.6663267040625531,
      "grad_norm": 0.625,
      "learning_rate": 6.046587760924698e-05,
      "loss": 2.308,
      "step": 1715
    },
    {
      "epoch": 0.6667152327529686,
      "grad_norm": 0.6328125,
      "learning_rate": 6.034126348029827e-05,
      "loss": 2.3354,
      "step": 1716
    },
    {
      "epoch": 0.6671037614433841,
      "grad_norm": 0.62890625,
      "learning_rate": 6.021672238727927e-05,
      "loss": 2.3802,
      "step": 1717
    },
    {
      "epoch": 0.6674922901337995,
      "grad_norm": 0.671875,
      "learning_rate": 6.009225455954614e-05,
      "loss": 2.3173,
      "step": 1718
    },
    {
      "epoch": 0.667880818824215,
      "grad_norm": 0.6484375,
      "learning_rate": 5.996786022632004e-05,
      "loss": 2.417,
      "step": 1719
    },
    {
      "epoch": 0.6682693475146305,
      "grad_norm": 0.65234375,
      "learning_rate": 5.9843539616686875e-05,
      "loss": 2.3546,
      "step": 1720
    },
    {
      "epoch": 0.668657876205046,
      "grad_norm": 0.67578125,
      "learning_rate": 5.971929295959671e-05,
      "loss": 2.3215,
      "step": 1721
    },
    {
      "epoch": 0.6690464048954615,
      "grad_norm": 0.6328125,
      "learning_rate": 5.959512048386349e-05,
      "loss": 2.3583,
      "step": 1722
    },
    {
      "epoch": 0.669434933585877,
      "grad_norm": 0.65625,
      "learning_rate": 5.9471022418164425e-05,
      "loss": 2.4115,
      "step": 1723
    },
    {
      "epoch": 0.6698234622762924,
      "grad_norm": 0.65234375,
      "learning_rate": 5.934699899103982e-05,
      "loss": 2.4339,
      "step": 1724
    },
    {
      "epoch": 0.6702119909667079,
      "grad_norm": 0.63671875,
      "learning_rate": 5.9223050430892445e-05,
      "loss": 2.3634,
      "step": 1725
    },
    {
      "epoch": 0.6706005196571234,
      "grad_norm": 0.69921875,
      "learning_rate": 5.909917696598721e-05,
      "loss": 2.373,
      "step": 1726
    },
    {
      "epoch": 0.6709890483475389,
      "grad_norm": 0.671875,
      "learning_rate": 5.897537882445079e-05,
      "loss": 2.4134,
      "step": 1727
    },
    {
      "epoch": 0.6713775770379544,
      "grad_norm": 0.62109375,
      "learning_rate": 5.8851656234271055e-05,
      "loss": 2.3665,
      "step": 1728
    },
    {
      "epoch": 0.6717661057283699,
      "grad_norm": 0.6484375,
      "learning_rate": 5.872800942329677e-05,
      "loss": 2.3632,
      "step": 1729
    },
    {
      "epoch": 0.6721546344187853,
      "grad_norm": 0.67578125,
      "learning_rate": 5.860443861923718e-05,
      "loss": 2.3408,
      "step": 1730
    },
    {
      "epoch": 0.6725431631092008,
      "grad_norm": 0.625,
      "learning_rate": 5.848094404966149e-05,
      "loss": 2.3687,
      "step": 1731
    },
    {
      "epoch": 0.6729316917996163,
      "grad_norm": 0.69140625,
      "learning_rate": 5.83575259419986e-05,
      "loss": 2.4013,
      "step": 1732
    },
    {
      "epoch": 0.6733202204900318,
      "grad_norm": 0.671875,
      "learning_rate": 5.82341845235365e-05,
      "loss": 2.3826,
      "step": 1733
    },
    {
      "epoch": 0.6737087491804473,
      "grad_norm": 0.64453125,
      "learning_rate": 5.811092002142198e-05,
      "loss": 2.4015,
      "step": 1734
    },
    {
      "epoch": 0.6740972778708628,
      "grad_norm": 0.62890625,
      "learning_rate": 5.798773266266022e-05,
      "loss": 2.3255,
      "step": 1735
    },
    {
      "epoch": 0.6744858065612782,
      "grad_norm": 0.625,
      "learning_rate": 5.786462267411431e-05,
      "loss": 2.3693,
      "step": 1736
    },
    {
      "epoch": 0.6748743352516937,
      "grad_norm": 0.6484375,
      "learning_rate": 5.774159028250483e-05,
      "loss": 2.407,
      "step": 1737
    },
    {
      "epoch": 0.6752628639421092,
      "grad_norm": 0.71875,
      "learning_rate": 5.761863571440949e-05,
      "loss": 2.4144,
      "step": 1738
    },
    {
      "epoch": 0.6756513926325247,
      "grad_norm": 0.66015625,
      "learning_rate": 5.749575919626269e-05,
      "loss": 2.3344,
      "step": 1739
    },
    {
      "epoch": 0.6760399213229402,
      "grad_norm": 0.65625,
      "learning_rate": 5.737296095435498e-05,
      "loss": 2.3339,
      "step": 1740
    },
    {
      "epoch": 0.6764284500133557,
      "grad_norm": 0.7109375,
      "learning_rate": 5.7250241214832914e-05,
      "loss": 2.3194,
      "step": 1741
    },
    {
      "epoch": 0.6768169787037711,
      "grad_norm": 0.6796875,
      "learning_rate": 5.7127600203698385e-05,
      "loss": 2.3024,
      "step": 1742
    },
    {
      "epoch": 0.6772055073941866,
      "grad_norm": 0.6796875,
      "learning_rate": 5.700503814680831e-05,
      "loss": 2.3236,
      "step": 1743
    },
    {
      "epoch": 0.6775940360846021,
      "grad_norm": 0.609375,
      "learning_rate": 5.6882555269874224e-05,
      "loss": 2.3919,
      "step": 1744
    },
    {
      "epoch": 0.6779825647750176,
      "grad_norm": 0.6328125,
      "learning_rate": 5.676015179846181e-05,
      "loss": 2.3636,
      "step": 1745
    },
    {
      "epoch": 0.6783710934654331,
      "grad_norm": 0.6484375,
      "learning_rate": 5.663782795799057e-05,
      "loss": 2.397,
      "step": 1746
    },
    {
      "epoch": 0.6787596221558486,
      "grad_norm": 0.6953125,
      "learning_rate": 5.651558397373335e-05,
      "loss": 2.3954,
      "step": 1747
    },
    {
      "epoch": 0.679148150846264,
      "grad_norm": 0.62890625,
      "learning_rate": 5.6393420070815806e-05,
      "loss": 2.3431,
      "step": 1748
    },
    {
      "epoch": 0.6795366795366795,
      "grad_norm": 0.65625,
      "learning_rate": 5.627133647421628e-05,
      "loss": 2.3616,
      "step": 1749
    },
    {
      "epoch": 0.679925208227095,
      "grad_norm": 0.640625,
      "learning_rate": 5.614933340876515e-05,
      "loss": 2.4101,
      "step": 1750
    },
    {
      "epoch": 0.6803137369175105,
      "grad_norm": 0.64453125,
      "learning_rate": 5.602741109914449e-05,
      "loss": 2.3575,
      "step": 1751
    },
    {
      "epoch": 0.680702265607926,
      "grad_norm": 0.6640625,
      "learning_rate": 5.590556976988766e-05,
      "loss": 2.396,
      "step": 1752
    },
    {
      "epoch": 0.6810907942983415,
      "grad_norm": 0.671875,
      "learning_rate": 5.578380964537888e-05,
      "loss": 2.3252,
      "step": 1753
    },
    {
      "epoch": 0.6814793229887569,
      "grad_norm": 0.609375,
      "learning_rate": 5.566213094985282e-05,
      "loss": 2.3266,
      "step": 1754
    },
    {
      "epoch": 0.6818678516791724,
      "grad_norm": 0.6484375,
      "learning_rate": 5.55405339073942e-05,
      "loss": 2.3403,
      "step": 1755
    },
    {
      "epoch": 0.6822563803695879,
      "grad_norm": 0.6484375,
      "learning_rate": 5.5419018741937424e-05,
      "loss": 2.3728,
      "step": 1756
    },
    {
      "epoch": 0.6826449090600034,
      "grad_norm": 0.67578125,
      "learning_rate": 5.529758567726593e-05,
      "loss": 2.3492,
      "step": 1757
    },
    {
      "epoch": 0.6830334377504189,
      "grad_norm": 0.6953125,
      "learning_rate": 5.517623493701213e-05,
      "loss": 2.3451,
      "step": 1758
    },
    {
      "epoch": 0.6834219664408344,
      "grad_norm": 0.6328125,
      "learning_rate": 5.505496674465678e-05,
      "loss": 2.3361,
      "step": 1759
    },
    {
      "epoch": 0.6838104951312498,
      "grad_norm": 0.62890625,
      "learning_rate": 5.49337813235286e-05,
      "loss": 2.3548,
      "step": 1760
    },
    {
      "epoch": 0.6841990238216653,
      "grad_norm": 0.63671875,
      "learning_rate": 5.481267889680388e-05,
      "loss": 2.3969,
      "step": 1761
    },
    {
      "epoch": 0.6845875525120808,
      "grad_norm": 0.64453125,
      "learning_rate": 5.4691659687506094e-05,
      "loss": 2.3452,
      "step": 1762
    },
    {
      "epoch": 0.6849760812024963,
      "grad_norm": 0.69921875,
      "learning_rate": 5.457072391850543e-05,
      "loss": 2.3758,
      "step": 1763
    },
    {
      "epoch": 0.6853646098929118,
      "grad_norm": 0.65234375,
      "learning_rate": 5.444987181251845e-05,
      "loss": 2.4076,
      "step": 1764
    },
    {
      "epoch": 0.6857531385833273,
      "grad_norm": 0.66796875,
      "learning_rate": 5.432910359210754e-05,
      "loss": 2.3528,
      "step": 1765
    },
    {
      "epoch": 0.6861416672737427,
      "grad_norm": 0.62890625,
      "learning_rate": 5.420841947968076e-05,
      "loss": 2.3899,
      "step": 1766
    },
    {
      "epoch": 0.6865301959641582,
      "grad_norm": 0.62109375,
      "learning_rate": 5.4087819697491116e-05,
      "loss": 2.3693,
      "step": 1767
    },
    {
      "epoch": 0.6869187246545737,
      "grad_norm": 0.6484375,
      "learning_rate": 5.3967304467636406e-05,
      "loss": 2.3826,
      "step": 1768
    },
    {
      "epoch": 0.6873072533449892,
      "grad_norm": 0.640625,
      "learning_rate": 5.38468740120587e-05,
      "loss": 2.3899,
      "step": 1769
    },
    {
      "epoch": 0.6876957820354047,
      "grad_norm": 0.609375,
      "learning_rate": 5.372652855254394e-05,
      "loss": 2.3605,
      "step": 1770
    },
    {
      "epoch": 0.6880843107258202,
      "grad_norm": 0.640625,
      "learning_rate": 5.360626831072156e-05,
      "loss": 2.4103,
      "step": 1771
    },
    {
      "epoch": 0.6884728394162356,
      "grad_norm": 0.671875,
      "learning_rate": 5.348609350806402e-05,
      "loss": 2.3197,
      "step": 1772
    },
    {
      "epoch": 0.6888613681066511,
      "grad_norm": 0.62109375,
      "learning_rate": 5.3366004365886433e-05,
      "loss": 2.3234,
      "step": 1773
    },
    {
      "epoch": 0.6892498967970666,
      "grad_norm": 0.66015625,
      "learning_rate": 5.3246001105346254e-05,
      "loss": 2.3023,
      "step": 1774
    },
    {
      "epoch": 0.6896384254874821,
      "grad_norm": 0.68359375,
      "learning_rate": 5.3126083947442584e-05,
      "loss": 2.4167,
      "step": 1775
    },
    {
      "epoch": 0.6900269541778976,
      "grad_norm": 0.61328125,
      "learning_rate": 5.300625311301615e-05,
      "loss": 2.3408,
      "step": 1776
    },
    {
      "epoch": 0.690415482868313,
      "grad_norm": 0.62890625,
      "learning_rate": 5.2886508822748585e-05,
      "loss": 2.347,
      "step": 1777
    },
    {
      "epoch": 0.6908040115587285,
      "grad_norm": 0.60546875,
      "learning_rate": 5.27668512971622e-05,
      "loss": 2.3668,
      "step": 1778
    },
    {
      "epoch": 0.691192540249144,
      "grad_norm": 0.625,
      "learning_rate": 5.264728075661951e-05,
      "loss": 2.3325,
      "step": 1779
    },
    {
      "epoch": 0.6915810689395595,
      "grad_norm": 0.625,
      "learning_rate": 5.2527797421322835e-05,
      "loss": 2.3634,
      "step": 1780
    },
    {
      "epoch": 0.691969597629975,
      "grad_norm": 0.6953125,
      "learning_rate": 5.240840151131388e-05,
      "loss": 2.3602,
      "step": 1781
    },
    {
      "epoch": 0.6923581263203905,
      "grad_norm": 0.64453125,
      "learning_rate": 5.2289093246473355e-05,
      "loss": 2.425,
      "step": 1782
    },
    {
      "epoch": 0.6927466550108059,
      "grad_norm": 0.6328125,
      "learning_rate": 5.216987284652061e-05,
      "loss": 2.3902,
      "step": 1783
    },
    {
      "epoch": 0.6931351837012214,
      "grad_norm": 0.66015625,
      "learning_rate": 5.205074053101306e-05,
      "loss": 2.3617,
      "step": 1784
    },
    {
      "epoch": 0.6935237123916369,
      "grad_norm": 0.65234375,
      "learning_rate": 5.193169651934603e-05,
      "loss": 2.3791,
      "step": 1785
    },
    {
      "epoch": 0.6939122410820524,
      "grad_norm": 0.6328125,
      "learning_rate": 5.181274103075215e-05,
      "loss": 2.3141,
      "step": 1786
    },
    {
      "epoch": 0.6943007697724679,
      "grad_norm": 0.61328125,
      "learning_rate": 5.169387428430108e-05,
      "loss": 2.3242,
      "step": 1787
    },
    {
      "epoch": 0.6946892984628834,
      "grad_norm": 0.6796875,
      "learning_rate": 5.1575096498899e-05,
      "loss": 2.3407,
      "step": 1788
    },
    {
      "epoch": 0.6950778271532988,
      "grad_norm": 0.625,
      "learning_rate": 5.1456407893288275e-05,
      "loss": 2.3777,
      "step": 1789
    },
    {
      "epoch": 0.6954663558437143,
      "grad_norm": 0.64453125,
      "learning_rate": 5.133780868604705e-05,
      "loss": 2.3728,
      "step": 1790
    },
    {
      "epoch": 0.6958548845341298,
      "grad_norm": 0.6484375,
      "learning_rate": 5.121929909558887e-05,
      "loss": 2.3792,
      "step": 1791
    },
    {
      "epoch": 0.6962434132245453,
      "grad_norm": 0.625,
      "learning_rate": 5.110087934016209e-05,
      "loss": 2.3423,
      "step": 1792
    },
    {
      "epoch": 0.6966319419149608,
      "grad_norm": 0.625,
      "learning_rate": 5.098254963784979e-05,
      "loss": 2.3595,
      "step": 1793
    },
    {
      "epoch": 0.6970204706053763,
      "grad_norm": 0.6328125,
      "learning_rate": 5.0864310206569144e-05,
      "loss": 2.3816,
      "step": 1794
    },
    {
      "epoch": 0.6974089992957917,
      "grad_norm": 0.671875,
      "learning_rate": 5.074616126407106e-05,
      "loss": 2.4156,
      "step": 1795
    },
    {
      "epoch": 0.6977975279862072,
      "grad_norm": 0.671875,
      "learning_rate": 5.0628103027939855e-05,
      "loss": 2.3937,
      "step": 1796
    },
    {
      "epoch": 0.6981860566766227,
      "grad_norm": 0.65625,
      "learning_rate": 5.0510135715592745e-05,
      "loss": 2.388,
      "step": 1797
    },
    {
      "epoch": 0.6985745853670382,
      "grad_norm": 0.66796875,
      "learning_rate": 5.039225954427953e-05,
      "loss": 2.3512,
      "step": 1798
    },
    {
      "epoch": 0.6989631140574537,
      "grad_norm": 0.640625,
      "learning_rate": 5.027447473108218e-05,
      "loss": 2.4035,
      "step": 1799
    },
    {
      "epoch": 0.6993516427478692,
      "grad_norm": 0.69921875,
      "learning_rate": 5.015678149291439e-05,
      "loss": 2.3672,
      "step": 1800
    },
    {
      "epoch": 0.6997401714382846,
      "grad_norm": 0.61328125,
      "learning_rate": 5.003918004652117e-05,
      "loss": 2.3271,
      "step": 1801
    },
    {
      "epoch": 0.7001287001287001,
      "grad_norm": 0.60546875,
      "learning_rate": 4.992167060847858e-05,
      "loss": 2.3346,
      "step": 1802
    },
    {
      "epoch": 0.7005172288191156,
      "grad_norm": 0.625,
      "learning_rate": 4.9804253395193154e-05,
      "loss": 2.2578,
      "step": 1803
    },
    {
      "epoch": 0.7009057575095311,
      "grad_norm": 0.640625,
      "learning_rate": 4.968692862290165e-05,
      "loss": 2.3872,
      "step": 1804
    },
    {
      "epoch": 0.7012942861999466,
      "grad_norm": 0.65625,
      "learning_rate": 4.9569696507670535e-05,
      "loss": 2.3211,
      "step": 1805
    },
    {
      "epoch": 0.7016828148903621,
      "grad_norm": 0.63671875,
      "learning_rate": 4.945255726539566e-05,
      "loss": 2.3605,
      "step": 1806
    },
    {
      "epoch": 0.7020713435807775,
      "grad_norm": 0.66015625,
      "learning_rate": 4.9335511111801836e-05,
      "loss": 2.4147,
      "step": 1807
    },
    {
      "epoch": 0.702459872271193,
      "grad_norm": 0.66015625,
      "learning_rate": 4.921855826244248e-05,
      "loss": 2.3586,
      "step": 1808
    },
    {
      "epoch": 0.7028484009616085,
      "grad_norm": 0.60546875,
      "learning_rate": 4.910169893269908e-05,
      "loss": 2.279,
      "step": 1809
    },
    {
      "epoch": 0.703236929652024,
      "grad_norm": 0.609375,
      "learning_rate": 4.8984933337781e-05,
      "loss": 2.3442,
      "step": 1810
    },
    {
      "epoch": 0.7036254583424395,
      "grad_norm": 0.7109375,
      "learning_rate": 4.886826169272489e-05,
      "loss": 2.3817,
      "step": 1811
    },
    {
      "epoch": 0.704013987032855,
      "grad_norm": 0.62109375,
      "learning_rate": 4.8751684212394466e-05,
      "loss": 2.3372,
      "step": 1812
    },
    {
      "epoch": 0.7044025157232704,
      "grad_norm": 0.640625,
      "learning_rate": 4.8635201111479986e-05,
      "loss": 2.403,
      "step": 1813
    },
    {
      "epoch": 0.7047910444136859,
      "grad_norm": 0.62890625,
      "learning_rate": 4.851881260449791e-05,
      "loss": 2.3849,
      "step": 1814
    },
    {
      "epoch": 0.7051795731041014,
      "grad_norm": 0.640625,
      "learning_rate": 4.840251890579047e-05,
      "loss": 2.3893,
      "step": 1815
    },
    {
      "epoch": 0.7055681017945169,
      "grad_norm": 0.671875,
      "learning_rate": 4.828632022952535e-05,
      "loss": 2.3303,
      "step": 1816
    },
    {
      "epoch": 0.7059566304849324,
      "grad_norm": 0.6328125,
      "learning_rate": 4.817021678969518e-05,
      "loss": 2.3204,
      "step": 1817
    },
    {
      "epoch": 0.7063451591753479,
      "grad_norm": 0.61328125,
      "learning_rate": 4.805420880011723e-05,
      "loss": 2.3551,
      "step": 1818
    },
    {
      "epoch": 0.7067336878657633,
      "grad_norm": 0.6484375,
      "learning_rate": 4.793829647443302e-05,
      "loss": 2.3654,
      "step": 1819
    },
    {
      "epoch": 0.7071222165561788,
      "grad_norm": 0.6328125,
      "learning_rate": 4.782248002610781e-05,
      "loss": 2.2714,
      "step": 1820
    },
    {
      "epoch": 0.7075107452465943,
      "grad_norm": 0.640625,
      "learning_rate": 4.770675966843035e-05,
      "loss": 2.3196,
      "step": 1821
    },
    {
      "epoch": 0.7078992739370098,
      "grad_norm": 0.625,
      "learning_rate": 4.7591135614512417e-05,
      "loss": 2.4134,
      "step": 1822
    },
    {
      "epoch": 0.7082878026274253,
      "grad_norm": 0.62890625,
      "learning_rate": 4.747560807728847e-05,
      "loss": 2.3107,
      "step": 1823
    },
    {
      "epoch": 0.7086763313178408,
      "grad_norm": 0.61328125,
      "learning_rate": 4.736017726951515e-05,
      "loss": 2.3464,
      "step": 1824
    },
    {
      "epoch": 0.7090648600082562,
      "grad_norm": 0.64453125,
      "learning_rate": 4.724484340377102e-05,
      "loss": 2.3962,
      "step": 1825
    },
    {
      "epoch": 0.7094533886986717,
      "grad_norm": 0.6640625,
      "learning_rate": 4.7129606692456095e-05,
      "loss": 2.411,
      "step": 1826
    },
    {
      "epoch": 0.7098419173890872,
      "grad_norm": 0.64453125,
      "learning_rate": 4.701446734779149e-05,
      "loss": 2.3093,
      "step": 1827
    },
    {
      "epoch": 0.7102304460795027,
      "grad_norm": 0.6328125,
      "learning_rate": 4.6899425581818924e-05,
      "loss": 2.3815,
      "step": 1828
    },
    {
      "epoch": 0.7106189747699182,
      "grad_norm": 0.6875,
      "learning_rate": 4.6784481606400496e-05,
      "loss": 2.4293,
      "step": 1829
    },
    {
      "epoch": 0.7110075034603337,
      "grad_norm": 0.65234375,
      "learning_rate": 4.66696356332182e-05,
      "loss": 2.3517,
      "step": 1830
    },
    {
      "epoch": 0.7113960321507491,
      "grad_norm": 0.67578125,
      "learning_rate": 4.655488787377354e-05,
      "loss": 2.4058,
      "step": 1831
    },
    {
      "epoch": 0.7117845608411646,
      "grad_norm": 0.6171875,
      "learning_rate": 4.644023853938716e-05,
      "loss": 2.2576,
      "step": 1832
    },
    {
      "epoch": 0.7121730895315801,
      "grad_norm": 0.6015625,
      "learning_rate": 4.6325687841198415e-05,
      "loss": 2.2638,
      "step": 1833
    },
    {
      "epoch": 0.7125616182219956,
      "grad_norm": 0.65234375,
      "learning_rate": 4.621123599016505e-05,
      "loss": 2.2876,
      "step": 1834
    },
    {
      "epoch": 0.7129501469124111,
      "grad_norm": 0.640625,
      "learning_rate": 4.609688319706271e-05,
      "loss": 2.316,
      "step": 1835
    },
    {
      "epoch": 0.7133386756028266,
      "grad_norm": 0.66015625,
      "learning_rate": 4.5982629672484734e-05,
      "loss": 2.3727,
      "step": 1836
    },
    {
      "epoch": 0.713727204293242,
      "grad_norm": 0.66015625,
      "learning_rate": 4.586847562684147e-05,
      "loss": 2.3608,
      "step": 1837
    },
    {
      "epoch": 0.7141157329836575,
      "grad_norm": 0.63671875,
      "learning_rate": 4.575442127036019e-05,
      "loss": 2.2822,
      "step": 1838
    },
    {
      "epoch": 0.714504261674073,
      "grad_norm": 0.6328125,
      "learning_rate": 4.564046681308457e-05,
      "loss": 2.3288,
      "step": 1839
    },
    {
      "epoch": 0.7148927903644885,
      "grad_norm": 0.73046875,
      "learning_rate": 4.5526612464874285e-05,
      "loss": 2.3741,
      "step": 1840
    },
    {
      "epoch": 0.715281319054904,
      "grad_norm": 0.6328125,
      "learning_rate": 4.541285843540465e-05,
      "loss": 2.2713,
      "step": 1841
    },
    {
      "epoch": 0.7156698477453195,
      "grad_norm": 0.61328125,
      "learning_rate": 4.5299204934166216e-05,
      "loss": 2.3915,
      "step": 1842
    },
    {
      "epoch": 0.7160583764357349,
      "grad_norm": 0.64453125,
      "learning_rate": 4.518565217046445e-05,
      "loss": 2.3431,
      "step": 1843
    },
    {
      "epoch": 0.7164469051261504,
      "grad_norm": 0.6328125,
      "learning_rate": 4.507220035341929e-05,
      "loss": 2.3615,
      "step": 1844
    },
    {
      "epoch": 0.7168354338165659,
      "grad_norm": 0.6171875,
      "learning_rate": 4.4958849691964675e-05,
      "loss": 2.2747,
      "step": 1845
    },
    {
      "epoch": 0.7172239625069814,
      "grad_norm": 0.66015625,
      "learning_rate": 4.484560039484836e-05,
      "loss": 2.359,
      "step": 1846
    },
    {
      "epoch": 0.7176124911973969,
      "grad_norm": 0.6875,
      "learning_rate": 4.47324526706314e-05,
      "loss": 2.4031,
      "step": 1847
    },
    {
      "epoch": 0.7180010198878123,
      "grad_norm": 0.66015625,
      "learning_rate": 4.4619406727687785e-05,
      "loss": 2.4118,
      "step": 1848
    },
    {
      "epoch": 0.7183895485782278,
      "grad_norm": 3.046875,
      "learning_rate": 4.450646277420407e-05,
      "loss": 2.4098,
      "step": 1849
    },
    {
      "epoch": 0.7187780772686433,
      "grad_norm": 0.61328125,
      "learning_rate": 4.439362101817898e-05,
      "loss": 2.3236,
      "step": 1850
    },
    {
      "epoch": 0.7191666059590588,
      "grad_norm": 0.62109375,
      "learning_rate": 4.4280881667423026e-05,
      "loss": 2.3833,
      "step": 1851
    },
    {
      "epoch": 0.7195551346494743,
      "grad_norm": 0.65234375,
      "learning_rate": 4.416824492955817e-05,
      "loss": 2.3257,
      "step": 1852
    },
    {
      "epoch": 0.7199436633398898,
      "grad_norm": 0.65625,
      "learning_rate": 4.40557110120173e-05,
      "loss": 2.3589,
      "step": 1853
    },
    {
      "epoch": 0.7203321920303052,
      "grad_norm": 0.65625,
      "learning_rate": 4.394328012204409e-05,
      "loss": 2.357,
      "step": 1854
    },
    {
      "epoch": 0.7207207207207207,
      "grad_norm": 0.6796875,
      "learning_rate": 4.383095246669232e-05,
      "loss": 2.3558,
      "step": 1855
    },
    {
      "epoch": 0.7211092494111362,
      "grad_norm": 0.6328125,
      "learning_rate": 4.371872825282578e-05,
      "loss": 2.3796,
      "step": 1856
    },
    {
      "epoch": 0.7214977781015517,
      "grad_norm": 0.6796875,
      "learning_rate": 4.3606607687117704e-05,
      "loss": 2.4108,
      "step": 1857
    },
    {
      "epoch": 0.7218863067919672,
      "grad_norm": 0.640625,
      "learning_rate": 4.349459097605047e-05,
      "loss": 2.3564,
      "step": 1858
    },
    {
      "epoch": 0.7222748354823827,
      "grad_norm": 0.62109375,
      "learning_rate": 4.338267832591519e-05,
      "loss": 2.3506,
      "step": 1859
    },
    {
      "epoch": 0.7226633641727981,
      "grad_norm": 0.6640625,
      "learning_rate": 4.327086994281132e-05,
      "loss": 2.3364,
      "step": 1860
    },
    {
      "epoch": 0.7230518928632136,
      "grad_norm": 0.625,
      "learning_rate": 4.31591660326463e-05,
      "loss": 2.4074,
      "step": 1861
    },
    {
      "epoch": 0.7234404215536291,
      "grad_norm": 3.0,
      "learning_rate": 4.30475668011352e-05,
      "loss": 2.3424,
      "step": 1862
    },
    {
      "epoch": 0.7238289502440446,
      "grad_norm": 0.640625,
      "learning_rate": 4.29360724538003e-05,
      "loss": 2.3817,
      "step": 1863
    },
    {
      "epoch": 0.7242174789344601,
      "grad_norm": 0.64453125,
      "learning_rate": 4.282468319597067e-05,
      "loss": 2.3692,
      "step": 1864
    },
    {
      "epoch": 0.7246060076248756,
      "grad_norm": 0.65625,
      "learning_rate": 4.2713399232781914e-05,
      "loss": 2.2731,
      "step": 1865
    },
    {
      "epoch": 0.724994536315291,
      "grad_norm": 0.63671875,
      "learning_rate": 4.2602220769175717e-05,
      "loss": 2.3326,
      "step": 1866
    },
    {
      "epoch": 0.7253830650057065,
      "grad_norm": 0.65234375,
      "learning_rate": 4.249114800989944e-05,
      "loss": 2.389,
      "step": 1867
    },
    {
      "epoch": 0.725771593696122,
      "grad_norm": 0.625,
      "learning_rate": 4.238018115950582e-05,
      "loss": 2.2927,
      "step": 1868
    },
    {
      "epoch": 0.7261601223865375,
      "grad_norm": 0.62890625,
      "learning_rate": 4.22693204223525e-05,
      "loss": 2.3935,
      "step": 1869
    },
    {
      "epoch": 0.726548651076953,
      "grad_norm": 0.59765625,
      "learning_rate": 4.215856600260176e-05,
      "loss": 2.3794,
      "step": 1870
    },
    {
      "epoch": 0.7269371797673685,
      "grad_norm": 0.6640625,
      "learning_rate": 4.204791810422008e-05,
      "loss": 2.3499,
      "step": 1871
    },
    {
      "epoch": 0.7273257084577839,
      "grad_norm": 0.62109375,
      "learning_rate": 4.1937376930977704e-05,
      "loss": 2.3803,
      "step": 1872
    },
    {
      "epoch": 0.7277142371481994,
      "grad_norm": 0.63671875,
      "learning_rate": 4.182694268644837e-05,
      "loss": 2.3551,
      "step": 1873
    },
    {
      "epoch": 0.7281027658386149,
      "grad_norm": 0.58984375,
      "learning_rate": 4.17166155740089e-05,
      "loss": 2.3986,
      "step": 1874
    },
    {
      "epoch": 0.7284912945290304,
      "grad_norm": 0.60546875,
      "learning_rate": 4.160639579683885e-05,
      "loss": 2.301,
      "step": 1875
    },
    {
      "epoch": 0.7288798232194459,
      "grad_norm": 0.62109375,
      "learning_rate": 4.149628355792005e-05,
      "loss": 2.3123,
      "step": 1876
    },
    {
      "epoch": 0.7292683519098614,
      "grad_norm": 0.62109375,
      "learning_rate": 4.1386279060036316e-05,
      "loss": 2.3621,
      "step": 1877
    },
    {
      "epoch": 0.7296568806002768,
      "grad_norm": 0.62109375,
      "learning_rate": 4.127638250577305e-05,
      "loss": 2.305,
      "step": 1878
    },
    {
      "epoch": 0.7300454092906923,
      "grad_norm": 0.625,
      "learning_rate": 4.1166594097516854e-05,
      "loss": 2.3822,
      "step": 1879
    },
    {
      "epoch": 0.7304339379811078,
      "grad_norm": 0.62890625,
      "learning_rate": 4.105691403745519e-05,
      "loss": 2.346,
      "step": 1880
    },
    {
      "epoch": 0.7308224666715233,
      "grad_norm": 0.64453125,
      "learning_rate": 4.0947342527575916e-05,
      "loss": 2.3148,
      "step": 1881
    },
    {
      "epoch": 0.7312109953619388,
      "grad_norm": 0.61328125,
      "learning_rate": 4.0837879769667045e-05,
      "loss": 2.3351,
      "step": 1882
    },
    {
      "epoch": 0.7315995240523543,
      "grad_norm": 0.5859375,
      "learning_rate": 4.072852596531631e-05,
      "loss": 2.3522,
      "step": 1883
    },
    {
      "epoch": 0.7319880527427697,
      "grad_norm": 0.67578125,
      "learning_rate": 4.06192813159108e-05,
      "loss": 2.4085,
      "step": 1884
    },
    {
      "epoch": 0.7323765814331852,
      "grad_norm": 0.62890625,
      "learning_rate": 4.051014602263652e-05,
      "loss": 2.3633,
      "step": 1885
    },
    {
      "epoch": 0.7327651101236007,
      "grad_norm": 0.64453125,
      "learning_rate": 4.0401120286478154e-05,
      "loss": 2.422,
      "step": 1886
    },
    {
      "epoch": 0.7331536388140162,
      "grad_norm": 0.625,
      "learning_rate": 4.029220430821862e-05,
      "loss": 2.4292,
      "step": 1887
    },
    {
      "epoch": 0.7335421675044317,
      "grad_norm": 0.6015625,
      "learning_rate": 4.018339828843868e-05,
      "loss": 2.3436,
      "step": 1888
    },
    {
      "epoch": 0.7339306961948472,
      "grad_norm": 0.625,
      "learning_rate": 4.007470242751654e-05,
      "loss": 2.4115,
      "step": 1889
    },
    {
      "epoch": 0.7343192248852626,
      "grad_norm": 0.625,
      "learning_rate": 3.996611692562764e-05,
      "loss": 2.4112,
      "step": 1890
    },
    {
      "epoch": 0.7347077535756781,
      "grad_norm": 0.625,
      "learning_rate": 3.985764198274414e-05,
      "loss": 2.3491,
      "step": 1891
    },
    {
      "epoch": 0.7350962822660936,
      "grad_norm": 0.60546875,
      "learning_rate": 3.9749277798634575e-05,
      "loss": 2.288,
      "step": 1892
    },
    {
      "epoch": 0.7354848109565091,
      "grad_norm": 0.6171875,
      "learning_rate": 3.964102457286353e-05,
      "loss": 2.3784,
      "step": 1893
    },
    {
      "epoch": 0.7358733396469246,
      "grad_norm": 0.6640625,
      "learning_rate": 3.9532882504791236e-05,
      "loss": 2.3957,
      "step": 1894
    },
    {
      "epoch": 0.7362618683373401,
      "grad_norm": 0.625,
      "learning_rate": 3.9424851793573224e-05,
      "loss": 2.358,
      "step": 1895
    },
    {
      "epoch": 0.7366503970277555,
      "grad_norm": 0.62109375,
      "learning_rate": 3.931693263815997e-05,
      "loss": 2.4146,
      "step": 1896
    },
    {
      "epoch": 0.737038925718171,
      "grad_norm": 0.65625,
      "learning_rate": 3.920912523729642e-05,
      "loss": 2.4071,
      "step": 1897
    },
    {
      "epoch": 0.7374274544085865,
      "grad_norm": 0.69921875,
      "learning_rate": 3.9101429789521827e-05,
      "loss": 2.2899,
      "step": 1898
    },
    {
      "epoch": 0.737815983099002,
      "grad_norm": 0.66015625,
      "learning_rate": 3.8993846493169164e-05,
      "loss": 2.3797,
      "step": 1899
    },
    {
      "epoch": 0.7382045117894175,
      "grad_norm": 0.625,
      "learning_rate": 3.888637554636493e-05,
      "loss": 2.4159,
      "step": 1900
    },
    {
      "epoch": 0.738593040479833,
      "grad_norm": 0.67578125,
      "learning_rate": 3.877901714702873e-05,
      "loss": 2.385,
      "step": 1901
    },
    {
      "epoch": 0.7389815691702484,
      "grad_norm": 0.6484375,
      "learning_rate": 3.867177149287286e-05,
      "loss": 2.2841,
      "step": 1902
    },
    {
      "epoch": 0.7393700978606639,
      "grad_norm": 0.609375,
      "learning_rate": 3.8564638781402e-05,
      "loss": 2.3939,
      "step": 1903
    },
    {
      "epoch": 0.7397586265510794,
      "grad_norm": 0.6171875,
      "learning_rate": 3.845761920991283e-05,
      "loss": 2.4283,
      "step": 1904
    },
    {
      "epoch": 0.7401471552414949,
      "grad_norm": 0.625,
      "learning_rate": 3.83507129754937e-05,
      "loss": 2.4112,
      "step": 1905
    },
    {
      "epoch": 0.7405356839319104,
      "grad_norm": 0.67578125,
      "learning_rate": 3.824392027502417e-05,
      "loss": 2.3811,
      "step": 1906
    },
    {
      "epoch": 0.7409242126223259,
      "grad_norm": 0.6328125,
      "learning_rate": 3.8137241305174834e-05,
      "loss": 2.3265,
      "step": 1907
    },
    {
      "epoch": 0.7413127413127413,
      "grad_norm": 0.703125,
      "learning_rate": 3.803067626240665e-05,
      "loss": 2.2776,
      "step": 1908
    },
    {
      "epoch": 0.7417012700031568,
      "grad_norm": 0.6796875,
      "learning_rate": 3.7924225342970945e-05,
      "loss": 2.4382,
      "step": 1909
    },
    {
      "epoch": 0.7420897986935723,
      "grad_norm": 0.62890625,
      "learning_rate": 3.781788874290879e-05,
      "loss": 2.3497,
      "step": 1910
    },
    {
      "epoch": 0.7424783273839878,
      "grad_norm": 0.59765625,
      "learning_rate": 3.771166665805075e-05,
      "loss": 2.3409,
      "step": 1911
    },
    {
      "epoch": 0.7428668560744033,
      "grad_norm": 0.60546875,
      "learning_rate": 3.760555928401647e-05,
      "loss": 2.3056,
      "step": 1912
    },
    {
      "epoch": 0.7432553847648187,
      "grad_norm": 0.640625,
      "learning_rate": 3.749956681621438e-05,
      "loss": 2.3703,
      "step": 1913
    },
    {
      "epoch": 0.7436439134552342,
      "grad_norm": 0.62890625,
      "learning_rate": 3.7393689449841286e-05,
      "loss": 2.3297,
      "step": 1914
    },
    {
      "epoch": 0.7440324421456497,
      "grad_norm": 0.63671875,
      "learning_rate": 3.728792737988204e-05,
      "loss": 2.3389,
      "step": 1915
    },
    {
      "epoch": 0.7444209708360652,
      "grad_norm": 0.6328125,
      "learning_rate": 3.718228080110906e-05,
      "loss": 2.3029,
      "step": 1916
    },
    {
      "epoch": 0.7448094995264807,
      "grad_norm": 0.64453125,
      "learning_rate": 3.707674990808219e-05,
      "loss": 2.3986,
      "step": 1917
    },
    {
      "epoch": 0.7451980282168962,
      "grad_norm": 0.59765625,
      "learning_rate": 3.69713348951482e-05,
      "loss": 2.2478,
      "step": 1918
    },
    {
      "epoch": 0.7455865569073116,
      "grad_norm": 0.625,
      "learning_rate": 3.686603595644044e-05,
      "loss": 2.4261,
      "step": 1919
    },
    {
      "epoch": 0.7459750855977271,
      "grad_norm": 0.6328125,
      "learning_rate": 3.6760853285878486e-05,
      "loss": 2.3709,
      "step": 1920
    },
    {
      "epoch": 0.7463636142881426,
      "grad_norm": 0.60546875,
      "learning_rate": 3.6655787077167825e-05,
      "loss": 2.3114,
      "step": 1921
    },
    {
      "epoch": 0.7467521429785581,
      "grad_norm": 0.60546875,
      "learning_rate": 3.655083752379943e-05,
      "loss": 2.3476,
      "step": 1922
    },
    {
      "epoch": 0.7471406716689736,
      "grad_norm": 0.609375,
      "learning_rate": 3.644600481904947e-05,
      "loss": 2.4215,
      "step": 1923
    },
    {
      "epoch": 0.7475292003593891,
      "grad_norm": 0.62890625,
      "learning_rate": 3.634128915597895e-05,
      "loss": 2.3943,
      "step": 1924
    },
    {
      "epoch": 0.7479177290498045,
      "grad_norm": 0.6328125,
      "learning_rate": 3.623669072743321e-05,
      "loss": 2.4181,
      "step": 1925
    },
    {
      "epoch": 0.74830625774022,
      "grad_norm": 0.66796875,
      "learning_rate": 3.613220972604182e-05,
      "loss": 2.3593,
      "step": 1926
    },
    {
      "epoch": 0.7486947864306355,
      "grad_norm": 0.6953125,
      "learning_rate": 3.602784634421804e-05,
      "loss": 2.3978,
      "step": 1927
    },
    {
      "epoch": 0.749083315121051,
      "grad_norm": 0.60546875,
      "learning_rate": 3.5923600774158525e-05,
      "loss": 2.3903,
      "step": 1928
    },
    {
      "epoch": 0.7494718438114665,
      "grad_norm": 0.609375,
      "learning_rate": 3.581947320784299e-05,
      "loss": 2.3521,
      "step": 1929
    },
    {
      "epoch": 0.749860372501882,
      "grad_norm": 0.671875,
      "learning_rate": 3.571546383703379e-05,
      "loss": 2.3271,
      "step": 1930
    },
    {
      "epoch": 0.7502489011922974,
      "grad_norm": 0.6796875,
      "learning_rate": 3.561157285327564e-05,
      "loss": 2.3692,
      "step": 1931
    },
    {
      "epoch": 0.7506374298827129,
      "grad_norm": 0.65234375,
      "learning_rate": 3.550780044789525e-05,
      "loss": 2.3958,
      "step": 1932
    },
    {
      "epoch": 0.7510259585731284,
      "grad_norm": 0.64453125,
      "learning_rate": 3.540414681200089e-05,
      "loss": 2.3941,
      "step": 1933
    },
    {
      "epoch": 0.7514144872635439,
      "grad_norm": 0.62890625,
      "learning_rate": 3.5300612136482146e-05,
      "loss": 2.3173,
      "step": 1934
    },
    {
      "epoch": 0.7518030159539594,
      "grad_norm": 0.68359375,
      "learning_rate": 3.519719661200954e-05,
      "loss": 2.4301,
      "step": 1935
    },
    {
      "epoch": 0.7521915446443749,
      "grad_norm": 0.58984375,
      "learning_rate": 3.5093900429034134e-05,
      "loss": 2.3899,
      "step": 1936
    },
    {
      "epoch": 0.7525800733347903,
      "grad_norm": 0.6171875,
      "learning_rate": 3.499072377778724e-05,
      "loss": 2.3445,
      "step": 1937
    },
    {
      "epoch": 0.7529686020252058,
      "grad_norm": 0.609375,
      "learning_rate": 3.488766684828e-05,
      "loss": 2.2973,
      "step": 1938
    },
    {
      "epoch": 0.7533571307156213,
      "grad_norm": 0.63671875,
      "learning_rate": 3.4784729830303096e-05,
      "loss": 2.3857,
      "step": 1939
    },
    {
      "epoch": 0.7537456594060368,
      "grad_norm": 0.671875,
      "learning_rate": 3.468191291342644e-05,
      "loss": 2.3077,
      "step": 1940
    },
    {
      "epoch": 0.7541341880964523,
      "grad_norm": 0.625,
      "learning_rate": 3.4579216286998615e-05,
      "loss": 2.3862,
      "step": 1941
    },
    {
      "epoch": 0.7545227167868678,
      "grad_norm": 0.62109375,
      "learning_rate": 3.4476640140146796e-05,
      "loss": 2.3252,
      "step": 1942
    },
    {
      "epoch": 0.7549112454772832,
      "grad_norm": 0.6328125,
      "learning_rate": 3.437418466177631e-05,
      "loss": 2.3137,
      "step": 1943
    },
    {
      "epoch": 0.7552997741676987,
      "grad_norm": 0.6171875,
      "learning_rate": 3.427185004057011e-05,
      "loss": 2.3269,
      "step": 1944
    },
    {
      "epoch": 0.7556883028581142,
      "grad_norm": 0.625,
      "learning_rate": 3.4169636464988695e-05,
      "loss": 2.3205,
      "step": 1945
    },
    {
      "epoch": 0.7560768315485297,
      "grad_norm": 0.6640625,
      "learning_rate": 3.4067544123269646e-05,
      "loss": 2.3266,
      "step": 1946
    },
    {
      "epoch": 0.7564653602389452,
      "grad_norm": 0.62109375,
      "learning_rate": 3.396557320342724e-05,
      "loss": 2.4334,
      "step": 1947
    },
    {
      "epoch": 0.7568538889293607,
      "grad_norm": 0.609375,
      "learning_rate": 3.386372389325213e-05,
      "loss": 2.3688,
      "step": 1948
    },
    {
      "epoch": 0.7572424176197761,
      "grad_norm": 0.62109375,
      "learning_rate": 3.3761996380311067e-05,
      "loss": 2.3101,
      "step": 1949
    },
    {
      "epoch": 0.7576309463101916,
      "grad_norm": 0.65625,
      "learning_rate": 3.3660390851946456e-05,
      "loss": 2.381,
      "step": 1950
    },
    {
      "epoch": 0.7580194750006071,
      "grad_norm": 0.609375,
      "learning_rate": 3.355890749527608e-05,
      "loss": 2.3385,
      "step": 1951
    },
    {
      "epoch": 0.7584080036910226,
      "grad_norm": 0.6328125,
      "learning_rate": 3.345754649719267e-05,
      "loss": 2.4517,
      "step": 1952
    },
    {
      "epoch": 0.7587965323814381,
      "grad_norm": 0.6171875,
      "learning_rate": 3.335630804436368e-05,
      "loss": 2.3311,
      "step": 1953
    },
    {
      "epoch": 0.7591850610718536,
      "grad_norm": 0.62890625,
      "learning_rate": 3.325519232323089e-05,
      "loss": 2.3541,
      "step": 1954
    },
    {
      "epoch": 0.759573589762269,
      "grad_norm": 0.640625,
      "learning_rate": 3.315419952001001e-05,
      "loss": 2.3937,
      "step": 1955
    },
    {
      "epoch": 0.7599621184526845,
      "grad_norm": 0.60546875,
      "learning_rate": 3.30533298206904e-05,
      "loss": 2.3497,
      "step": 1956
    },
    {
      "epoch": 0.7603506471431,
      "grad_norm": 0.62890625,
      "learning_rate": 3.295258341103475e-05,
      "loss": 2.3502,
      "step": 1957
    },
    {
      "epoch": 0.7607391758335155,
      "grad_norm": 0.6484375,
      "learning_rate": 3.285196047657865e-05,
      "loss": 2.4155,
      "step": 1958
    },
    {
      "epoch": 0.761127704523931,
      "grad_norm": 0.62109375,
      "learning_rate": 3.27514612026303e-05,
      "loss": 2.3778,
      "step": 1959
    },
    {
      "epoch": 0.7615162332143465,
      "grad_norm": 0.59765625,
      "learning_rate": 3.265108577427022e-05,
      "loss": 2.2699,
      "step": 1960
    },
    {
      "epoch": 0.7619047619047619,
      "grad_norm": 0.63671875,
      "learning_rate": 3.2550834376350745e-05,
      "loss": 2.3854,
      "step": 1961
    },
    {
      "epoch": 0.7622932905951774,
      "grad_norm": 0.640625,
      "learning_rate": 3.245070719349591e-05,
      "loss": 2.3895,
      "step": 1962
    },
    {
      "epoch": 0.7626818192855929,
      "grad_norm": 0.62109375,
      "learning_rate": 3.235070441010092e-05,
      "loss": 2.3569,
      "step": 1963
    },
    {
      "epoch": 0.7630703479760084,
      "grad_norm": 0.609375,
      "learning_rate": 3.225082621033193e-05,
      "loss": 2.3149,
      "step": 1964
    },
    {
      "epoch": 0.7634588766664239,
      "grad_norm": 0.6171875,
      "learning_rate": 3.215107277812563e-05,
      "loss": 2.3813,
      "step": 1965
    },
    {
      "epoch": 0.7638474053568394,
      "grad_norm": 0.65234375,
      "learning_rate": 3.2051444297188973e-05,
      "loss": 2.4437,
      "step": 1966
    },
    {
      "epoch": 0.7642359340472548,
      "grad_norm": 0.66015625,
      "learning_rate": 3.195194095099874e-05,
      "loss": 2.3745,
      "step": 1967
    },
    {
      "epoch": 0.7646244627376703,
      "grad_norm": 0.62890625,
      "learning_rate": 3.185256292280134e-05,
      "loss": 2.415,
      "step": 1968
    },
    {
      "epoch": 0.7650129914280858,
      "grad_norm": 0.61328125,
      "learning_rate": 3.1753310395612315e-05,
      "loss": 2.327,
      "step": 1969
    },
    {
      "epoch": 0.7654015201185013,
      "grad_norm": 0.671875,
      "learning_rate": 3.165418355221612e-05,
      "loss": 2.2945,
      "step": 1970
    },
    {
      "epoch": 0.7657900488089168,
      "grad_norm": 0.59765625,
      "learning_rate": 3.155518257516577e-05,
      "loss": 2.3694,
      "step": 1971
    },
    {
      "epoch": 0.7661785774993323,
      "grad_norm": 0.625,
      "learning_rate": 3.145630764678247e-05,
      "loss": 2.3799,
      "step": 1972
    },
    {
      "epoch": 0.7665671061897477,
      "grad_norm": 0.65625,
      "learning_rate": 3.135755894915526e-05,
      "loss": 2.3418,
      "step": 1973
    },
    {
      "epoch": 0.7669556348801632,
      "grad_norm": 0.609375,
      "learning_rate": 3.125893666414077e-05,
      "loss": 2.3477,
      "step": 1974
    },
    {
      "epoch": 0.7673441635705787,
      "grad_norm": 0.67578125,
      "learning_rate": 3.116044097336277e-05,
      "loss": 2.3125,
      "step": 1975
    },
    {
      "epoch": 0.7677326922609942,
      "grad_norm": 0.59375,
      "learning_rate": 3.1062072058211965e-05,
      "loss": 2.4135,
      "step": 1976
    },
    {
      "epoch": 0.7681212209514097,
      "grad_norm": 0.6171875,
      "learning_rate": 3.0963830099845484e-05,
      "loss": 2.3376,
      "step": 1977
    },
    {
      "epoch": 0.7685097496418252,
      "grad_norm": 0.61328125,
      "learning_rate": 3.086571527918673e-05,
      "loss": 2.2661,
      "step": 1978
    },
    {
      "epoch": 0.7688982783322406,
      "grad_norm": 0.59765625,
      "learning_rate": 3.0767727776924946e-05,
      "loss": 2.3262,
      "step": 1979
    },
    {
      "epoch": 0.7692868070226561,
      "grad_norm": 0.62109375,
      "learning_rate": 3.066986777351492e-05,
      "loss": 2.3649,
      "step": 1980
    },
    {
      "epoch": 0.7696753357130716,
      "grad_norm": 0.6328125,
      "learning_rate": 3.0572135449176606e-05,
      "loss": 2.3777,
      "step": 1981
    },
    {
      "epoch": 0.7700638644034871,
      "grad_norm": 0.59375,
      "learning_rate": 3.0474530983894857e-05,
      "loss": 2.3639,
      "step": 1982
    },
    {
      "epoch": 0.7704523930939026,
      "grad_norm": 0.66015625,
      "learning_rate": 3.037705455741903e-05,
      "loss": 2.3889,
      "step": 1983
    },
    {
      "epoch": 0.770840921784318,
      "grad_norm": 0.6328125,
      "learning_rate": 3.0279706349262727e-05,
      "loss": 2.4356,
      "step": 1984
    },
    {
      "epoch": 0.7712294504747335,
      "grad_norm": 0.62109375,
      "learning_rate": 3.0182486538703325e-05,
      "loss": 2.3708,
      "step": 1985
    },
    {
      "epoch": 0.771617979165149,
      "grad_norm": 0.609375,
      "learning_rate": 3.008539530478186e-05,
      "loss": 2.3059,
      "step": 1986
    },
    {
      "epoch": 0.7720065078555645,
      "grad_norm": 0.60546875,
      "learning_rate": 2.9988432826302537e-05,
      "loss": 2.365,
      "step": 1987
    },
    {
      "epoch": 0.77239503654598,
      "grad_norm": 0.625,
      "learning_rate": 2.9891599281832396e-05,
      "loss": 2.3592,
      "step": 1988
    },
    {
      "epoch": 0.7727835652363955,
      "grad_norm": 0.59765625,
      "learning_rate": 2.9794894849701083e-05,
      "loss": 2.3562,
      "step": 1989
    },
    {
      "epoch": 0.7731720939268109,
      "grad_norm": 0.6796875,
      "learning_rate": 2.9698319708000467e-05,
      "loss": 2.3798,
      "step": 1990
    },
    {
      "epoch": 0.7735606226172264,
      "grad_norm": 0.640625,
      "learning_rate": 2.96018740345843e-05,
      "loss": 2.3196,
      "step": 1991
    },
    {
      "epoch": 0.7739491513076419,
      "grad_norm": 0.6171875,
      "learning_rate": 2.9505558007067903e-05,
      "loss": 2.2949,
      "step": 1992
    },
    {
      "epoch": 0.7743376799980574,
      "grad_norm": 0.6328125,
      "learning_rate": 2.9409371802827857e-05,
      "loss": 2.3301,
      "step": 1993
    },
    {
      "epoch": 0.7747262086884729,
      "grad_norm": 0.6171875,
      "learning_rate": 2.9313315599001633e-05,
      "loss": 2.3685,
      "step": 1994
    },
    {
      "epoch": 0.7751147373788884,
      "grad_norm": 0.59375,
      "learning_rate": 2.9217389572487352e-05,
      "loss": 2.362,
      "step": 1995
    },
    {
      "epoch": 0.7755032660693038,
      "grad_norm": 0.7265625,
      "learning_rate": 2.9121593899943277e-05,
      "loss": 2.3637,
      "step": 1996
    },
    {
      "epoch": 0.7758917947597193,
      "grad_norm": 0.59375,
      "learning_rate": 2.9025928757787736e-05,
      "loss": 2.325,
      "step": 1997
    },
    {
      "epoch": 0.7762803234501348,
      "grad_norm": 0.59375,
      "learning_rate": 2.8930394322198608e-05,
      "loss": 2.272,
      "step": 1998
    },
    {
      "epoch": 0.7766688521405503,
      "grad_norm": 0.6484375,
      "learning_rate": 2.883499076911307e-05,
      "loss": 2.3975,
      "step": 1999
    },
    {
      "epoch": 0.7770573808309658,
      "grad_norm": 0.65625,
      "learning_rate": 2.8739718274227277e-05,
      "loss": 2.3345,
      "step": 2000
    },
    {
      "epoch": 0.7774459095213813,
      "grad_norm": 0.61328125,
      "learning_rate": 2.864457701299602e-05,
      "loss": 2.3376,
      "step": 2001
    },
    {
      "epoch": 0.7778344382117967,
      "grad_norm": 0.65234375,
      "learning_rate": 2.8549567160632397e-05,
      "loss": 2.3642,
      "step": 2002
    },
    {
      "epoch": 0.7782229669022122,
      "grad_norm": 0.625,
      "learning_rate": 2.845468889210752e-05,
      "loss": 2.3518,
      "step": 2003
    },
    {
      "epoch": 0.7786114955926277,
      "grad_norm": 0.61328125,
      "learning_rate": 2.8359942382150194e-05,
      "loss": 2.3389,
      "step": 2004
    },
    {
      "epoch": 0.7790000242830432,
      "grad_norm": 0.640625,
      "learning_rate": 2.826532780524649e-05,
      "loss": 2.3791,
      "step": 2005
    },
    {
      "epoch": 0.7793885529734587,
      "grad_norm": 0.60546875,
      "learning_rate": 2.8170845335639595e-05,
      "loss": 2.3456,
      "step": 2006
    },
    {
      "epoch": 0.7797770816638742,
      "grad_norm": 0.62109375,
      "learning_rate": 2.8076495147329375e-05,
      "loss": 2.3348,
      "step": 2007
    },
    {
      "epoch": 0.7801656103542896,
      "grad_norm": 0.6171875,
      "learning_rate": 2.79822774140721e-05,
      "loss": 2.3734,
      "step": 2008
    },
    {
      "epoch": 0.780554139044705,
      "grad_norm": 0.5859375,
      "learning_rate": 2.7888192309380102e-05,
      "loss": 2.3042,
      "step": 2009
    },
    {
      "epoch": 0.7809426677351206,
      "grad_norm": 0.62109375,
      "learning_rate": 2.7794240006521444e-05,
      "loss": 2.3699,
      "step": 2010
    },
    {
      "epoch": 0.7813311964255361,
      "grad_norm": 0.640625,
      "learning_rate": 2.7700420678519647e-05,
      "loss": 2.3301,
      "step": 2011
    },
    {
      "epoch": 0.7817197251159516,
      "grad_norm": 0.58984375,
      "learning_rate": 2.7606734498153365e-05,
      "loss": 2.3618,
      "step": 2012
    },
    {
      "epoch": 0.7821082538063671,
      "grad_norm": 0.65234375,
      "learning_rate": 2.7513181637955943e-05,
      "loss": 2.3884,
      "step": 2013
    },
    {
      "epoch": 0.7824967824967825,
      "grad_norm": 0.61328125,
      "learning_rate": 2.7419762270215312e-05,
      "loss": 2.3116,
      "step": 2014
    },
    {
      "epoch": 0.782885311187198,
      "grad_norm": 0.6171875,
      "learning_rate": 2.7326476566973512e-05,
      "loss": 2.3987,
      "step": 2015
    },
    {
      "epoch": 0.7832738398776135,
      "grad_norm": 0.625,
      "learning_rate": 2.7233324700026464e-05,
      "loss": 2.2948,
      "step": 2016
    },
    {
      "epoch": 0.783662368568029,
      "grad_norm": 0.60546875,
      "learning_rate": 2.7140306840923558e-05,
      "loss": 2.2709,
      "step": 2017
    },
    {
      "epoch": 0.7840508972584445,
      "grad_norm": 0.625,
      "learning_rate": 2.704742316096743e-05,
      "loss": 2.4295,
      "step": 2018
    },
    {
      "epoch": 0.78443942594886,
      "grad_norm": 0.62109375,
      "learning_rate": 2.6954673831213605e-05,
      "loss": 2.3513,
      "step": 2019
    },
    {
      "epoch": 0.7848279546392753,
      "grad_norm": 0.6328125,
      "learning_rate": 2.6862059022470198e-05,
      "loss": 2.3083,
      "step": 2020
    },
    {
      "epoch": 0.7852164833296909,
      "grad_norm": 0.6328125,
      "learning_rate": 2.6769578905297588e-05,
      "loss": 2.3593,
      "step": 2021
    },
    {
      "epoch": 0.7856050120201064,
      "grad_norm": 0.62890625,
      "learning_rate": 2.667723365000804e-05,
      "loss": 2.2834,
      "step": 2022
    },
    {
      "epoch": 0.7859935407105219,
      "grad_norm": 0.73046875,
      "learning_rate": 2.6585023426665535e-05,
      "loss": 2.3171,
      "step": 2023
    },
    {
      "epoch": 0.7863820694009374,
      "grad_norm": 0.6015625,
      "learning_rate": 2.6492948405085348e-05,
      "loss": 2.2423,
      "step": 2024
    },
    {
      "epoch": 0.7867705980913529,
      "grad_norm": 0.62890625,
      "learning_rate": 2.640100875483379e-05,
      "loss": 2.3983,
      "step": 2025
    },
    {
      "epoch": 0.7871591267817682,
      "grad_norm": 0.64453125,
      "learning_rate": 2.6309204645227825e-05,
      "loss": 2.3511,
      "step": 2026
    },
    {
      "epoch": 0.7875476554721837,
      "grad_norm": 0.6171875,
      "learning_rate": 2.621753624533484e-05,
      "loss": 2.2993,
      "step": 2027
    },
    {
      "epoch": 0.7879361841625993,
      "grad_norm": 0.58984375,
      "learning_rate": 2.6126003723972326e-05,
      "loss": 2.3746,
      "step": 2028
    },
    {
      "epoch": 0.7883247128530148,
      "grad_norm": 0.6015625,
      "learning_rate": 2.603460724970741e-05,
      "loss": 2.4112,
      "step": 2029
    },
    {
      "epoch": 0.7887132415434303,
      "grad_norm": 0.6796875,
      "learning_rate": 2.594334699085682e-05,
      "loss": 2.4033,
      "step": 2030
    },
    {
      "epoch": 0.7891017702338458,
      "grad_norm": 0.6015625,
      "learning_rate": 2.5852223115486385e-05,
      "loss": 2.2852,
      "step": 2031
    },
    {
      "epoch": 0.7894902989242611,
      "grad_norm": 0.61328125,
      "learning_rate": 2.5761235791410698e-05,
      "loss": 2.3602,
      "step": 2032
    },
    {
      "epoch": 0.7898788276146766,
      "grad_norm": 0.60546875,
      "learning_rate": 2.567038518619297e-05,
      "loss": 2.3884,
      "step": 2033
    },
    {
      "epoch": 0.7902673563050922,
      "grad_norm": 0.64453125,
      "learning_rate": 2.5579671467144585e-05,
      "loss": 2.4177,
      "step": 2034
    },
    {
      "epoch": 0.7906558849955077,
      "grad_norm": 0.625,
      "learning_rate": 2.5489094801324854e-05,
      "loss": 2.3687,
      "step": 2035
    },
    {
      "epoch": 0.7910444136859232,
      "grad_norm": 0.60546875,
      "learning_rate": 2.5398655355540658e-05,
      "loss": 2.3799,
      "step": 2036
    },
    {
      "epoch": 0.7914329423763387,
      "grad_norm": 0.59375,
      "learning_rate": 2.530835329634622e-05,
      "loss": 2.3293,
      "step": 2037
    },
    {
      "epoch": 0.791821471066754,
      "grad_norm": 0.60546875,
      "learning_rate": 2.5218188790042706e-05,
      "loss": 2.3401,
      "step": 2038
    },
    {
      "epoch": 0.7922099997571695,
      "grad_norm": 0.609375,
      "learning_rate": 2.5128162002678024e-05,
      "loss": 2.3879,
      "step": 2039
    },
    {
      "epoch": 0.792598528447585,
      "grad_norm": 0.5859375,
      "learning_rate": 2.503827310004635e-05,
      "loss": 2.3356,
      "step": 2040
    },
    {
      "epoch": 0.7929870571380006,
      "grad_norm": 0.625,
      "learning_rate": 2.4948522247688023e-05,
      "loss": 2.3512,
      "step": 2041
    },
    {
      "epoch": 0.793375585828416,
      "grad_norm": 1.0546875,
      "learning_rate": 2.4858909610889114e-05,
      "loss": 2.3043,
      "step": 2042
    },
    {
      "epoch": 0.7937641145188316,
      "grad_norm": 0.63671875,
      "learning_rate": 2.476943535468117e-05,
      "loss": 2.3064,
      "step": 2043
    },
    {
      "epoch": 0.794152643209247,
      "grad_norm": 0.609375,
      "learning_rate": 2.4680099643840882e-05,
      "loss": 2.3251,
      "step": 2044
    },
    {
      "epoch": 0.7945411718996624,
      "grad_norm": 0.61328125,
      "learning_rate": 2.459090264288979e-05,
      "loss": 2.3401,
      "step": 2045
    },
    {
      "epoch": 0.794929700590078,
      "grad_norm": 0.62890625,
      "learning_rate": 2.4501844516094007e-05,
      "loss": 2.2794,
      "step": 2046
    },
    {
      "epoch": 0.7953182292804934,
      "grad_norm": 0.6171875,
      "learning_rate": 2.4412925427463874e-05,
      "loss": 2.3531,
      "step": 2047
    },
    {
      "epoch": 0.795706757970909,
      "grad_norm": 0.63671875,
      "learning_rate": 2.4324145540753697e-05,
      "loss": 2.3799,
      "step": 2048
    },
    {
      "epoch": 0.7960952866613243,
      "grad_norm": 0.609375,
      "learning_rate": 2.4235505019461367e-05,
      "loss": 2.3758,
      "step": 2049
    },
    {
      "epoch": 0.7964838153517398,
      "grad_norm": 0.62109375,
      "learning_rate": 2.4147004026828192e-05,
      "loss": 2.3625,
      "step": 2050
    },
    {
      "epoch": 0.7968723440421553,
      "grad_norm": 0.61328125,
      "learning_rate": 2.4058642725838486e-05,
      "loss": 2.3455,
      "step": 2051
    },
    {
      "epoch": 0.7972608727325708,
      "grad_norm": 0.6328125,
      "learning_rate": 2.3970421279219323e-05,
      "loss": 2.3983,
      "step": 2052
    },
    {
      "epoch": 0.7976494014229863,
      "grad_norm": 0.5859375,
      "learning_rate": 2.3882339849440205e-05,
      "loss": 2.3582,
      "step": 2053
    },
    {
      "epoch": 0.7980379301134018,
      "grad_norm": 0.61328125,
      "learning_rate": 2.3794398598712786e-05,
      "loss": 2.3543,
      "step": 2054
    },
    {
      "epoch": 0.7984264588038172,
      "grad_norm": 0.6171875,
      "learning_rate": 2.370659768899056e-05,
      "loss": 2.3625,
      "step": 2055
    },
    {
      "epoch": 0.7988149874942327,
      "grad_norm": 0.64453125,
      "learning_rate": 2.36189372819686e-05,
      "loss": 2.3334,
      "step": 2056
    },
    {
      "epoch": 0.7992035161846482,
      "grad_norm": 0.6328125,
      "learning_rate": 2.3531417539083134e-05,
      "loss": 2.429,
      "step": 2057
    },
    {
      "epoch": 0.7995920448750637,
      "grad_norm": 0.6171875,
      "learning_rate": 2.3444038621511433e-05,
      "loss": 2.3277,
      "step": 2058
    },
    {
      "epoch": 0.7999805735654792,
      "grad_norm": 0.609375,
      "learning_rate": 2.33568006901714e-05,
      "loss": 2.3584,
      "step": 2059
    },
    {
      "epoch": 0.8003691022558947,
      "grad_norm": 0.62109375,
      "learning_rate": 2.3269703905721285e-05,
      "loss": 2.4414,
      "step": 2060
    },
    {
      "epoch": 0.8007576309463101,
      "grad_norm": 0.6328125,
      "learning_rate": 2.318274842855941e-05,
      "loss": 2.3893,
      "step": 2061
    },
    {
      "epoch": 0.8011461596367256,
      "grad_norm": 0.625,
      "learning_rate": 2.309593441882385e-05,
      "loss": 2.2949,
      "step": 2062
    },
    {
      "epoch": 0.8015346883271411,
      "grad_norm": 0.58984375,
      "learning_rate": 2.3009262036392177e-05,
      "loss": 2.373,
      "step": 2063
    },
    {
      "epoch": 0.8019232170175566,
      "grad_norm": 0.578125,
      "learning_rate": 2.2922731440881128e-05,
      "loss": 2.392,
      "step": 2064
    },
    {
      "epoch": 0.8023117457079721,
      "grad_norm": 0.609375,
      "learning_rate": 2.2836342791646325e-05,
      "loss": 2.3395,
      "step": 2065
    },
    {
      "epoch": 0.8027002743983876,
      "grad_norm": 0.61328125,
      "learning_rate": 2.275009624778195e-05,
      "loss": 2.3918,
      "step": 2066
    },
    {
      "epoch": 0.803088803088803,
      "grad_norm": 0.6171875,
      "learning_rate": 2.2663991968120512e-05,
      "loss": 2.3602,
      "step": 2067
    },
    {
      "epoch": 0.8034773317792185,
      "grad_norm": 0.6328125,
      "learning_rate": 2.257803011123254e-05,
      "loss": 2.3232,
      "step": 2068
    },
    {
      "epoch": 0.803865860469634,
      "grad_norm": 0.625,
      "learning_rate": 2.2492210835426253e-05,
      "loss": 2.327,
      "step": 2069
    },
    {
      "epoch": 0.8042543891600495,
      "grad_norm": 0.61328125,
      "learning_rate": 2.2406534298747293e-05,
      "loss": 2.3697,
      "step": 2070
    },
    {
      "epoch": 0.804642917850465,
      "grad_norm": 0.59765625,
      "learning_rate": 2.2321000658978465e-05,
      "loss": 2.3967,
      "step": 2071
    },
    {
      "epoch": 0.8050314465408805,
      "grad_norm": 0.609375,
      "learning_rate": 2.2235610073639335e-05,
      "loss": 2.326,
      "step": 2072
    },
    {
      "epoch": 0.8054199752312959,
      "grad_norm": 0.6171875,
      "learning_rate": 2.2150362699986083e-05,
      "loss": 2.3649,
      "step": 2073
    },
    {
      "epoch": 0.8058085039217114,
      "grad_norm": 0.625,
      "learning_rate": 2.206525869501114e-05,
      "loss": 2.4364,
      "step": 2074
    },
    {
      "epoch": 0.8061970326121269,
      "grad_norm": 0.5859375,
      "learning_rate": 2.198029821544292e-05,
      "loss": 2.3134,
      "step": 2075
    },
    {
      "epoch": 0.8065855613025424,
      "grad_norm": 0.60546875,
      "learning_rate": 2.1895481417745457e-05,
      "loss": 2.3823,
      "step": 2076
    },
    {
      "epoch": 0.8069740899929579,
      "grad_norm": 0.62890625,
      "learning_rate": 2.1810808458118247e-05,
      "loss": 2.4092,
      "step": 2077
    },
    {
      "epoch": 0.8073626186833734,
      "grad_norm": 0.64453125,
      "learning_rate": 2.172627949249586e-05,
      "loss": 2.3765,
      "step": 2078
    },
    {
      "epoch": 0.8077511473737888,
      "grad_norm": 0.609375,
      "learning_rate": 2.1641894676547703e-05,
      "loss": 2.3685,
      "step": 2079
    },
    {
      "epoch": 0.8081396760642043,
      "grad_norm": 0.61328125,
      "learning_rate": 2.15576541656777e-05,
      "loss": 2.2613,
      "step": 2080
    },
    {
      "epoch": 0.8085282047546198,
      "grad_norm": 0.63671875,
      "learning_rate": 2.1473558115024027e-05,
      "loss": 2.2979,
      "step": 2081
    },
    {
      "epoch": 0.8089167334450353,
      "grad_norm": 0.609375,
      "learning_rate": 2.1389606679458828e-05,
      "loss": 2.3692,
      "step": 2082
    },
    {
      "epoch": 0.8093052621354508,
      "grad_norm": 0.6015625,
      "learning_rate": 2.1305800013587908e-05,
      "loss": 2.3236,
      "step": 2083
    },
    {
      "epoch": 0.8096937908258663,
      "grad_norm": 0.59375,
      "learning_rate": 2.122213827175048e-05,
      "loss": 2.3073,
      "step": 2084
    },
    {
      "epoch": 0.8100823195162817,
      "grad_norm": 0.63671875,
      "learning_rate": 2.113862160801883e-05,
      "loss": 2.3627,
      "step": 2085
    },
    {
      "epoch": 0.8104708482066972,
      "grad_norm": 0.60546875,
      "learning_rate": 2.1055250176198094e-05,
      "loss": 2.2891,
      "step": 2086
    },
    {
      "epoch": 0.8108593768971127,
      "grad_norm": 0.59765625,
      "learning_rate": 2.097202412982595e-05,
      "loss": 2.3075,
      "step": 2087
    },
    {
      "epoch": 0.8112479055875282,
      "grad_norm": 0.59375,
      "learning_rate": 2.088894362217233e-05,
      "loss": 2.3189,
      "step": 2088
    },
    {
      "epoch": 0.8116364342779437,
      "grad_norm": 0.62890625,
      "learning_rate": 2.080600880623913e-05,
      "loss": 2.2979,
      "step": 2089
    },
    {
      "epoch": 0.8120249629683592,
      "grad_norm": 0.59375,
      "learning_rate": 2.0723219834759945e-05,
      "loss": 2.3131,
      "step": 2090
    },
    {
      "epoch": 0.8124134916587746,
      "grad_norm": 0.5859375,
      "learning_rate": 2.0640576860199778e-05,
      "loss": 2.3436,
      "step": 2091
    },
    {
      "epoch": 0.8128020203491901,
      "grad_norm": 0.60546875,
      "learning_rate": 2.05580800347548e-05,
      "loss": 2.4079,
      "step": 2092
    },
    {
      "epoch": 0.8131905490396056,
      "grad_norm": 0.6015625,
      "learning_rate": 2.0475729510351937e-05,
      "loss": 2.3397,
      "step": 2093
    },
    {
      "epoch": 0.8135790777300211,
      "grad_norm": 0.60546875,
      "learning_rate": 2.0393525438648774e-05,
      "loss": 2.4194,
      "step": 2094
    },
    {
      "epoch": 0.8139676064204366,
      "grad_norm": 0.62109375,
      "learning_rate": 2.031146797103317e-05,
      "loss": 2.3762,
      "step": 2095
    },
    {
      "epoch": 0.8143561351108521,
      "grad_norm": 0.5859375,
      "learning_rate": 2.0229557258622977e-05,
      "loss": 2.402,
      "step": 2096
    },
    {
      "epoch": 0.8147446638012675,
      "grad_norm": 0.61328125,
      "learning_rate": 2.0147793452265796e-05,
      "loss": 2.3029,
      "step": 2097
    },
    {
      "epoch": 0.815133192491683,
      "grad_norm": 0.65625,
      "learning_rate": 2.0066176702538676e-05,
      "loss": 2.3814,
      "step": 2098
    },
    {
      "epoch": 0.8155217211820985,
      "grad_norm": 0.6171875,
      "learning_rate": 1.998470715974784e-05,
      "loss": 2.3413,
      "step": 2099
    },
    {
      "epoch": 0.815910249872514,
      "grad_norm": 0.61328125,
      "learning_rate": 1.990338497392845e-05,
      "loss": 2.3015,
      "step": 2100
    },
    {
      "epoch": 0.8162987785629295,
      "grad_norm": 0.6015625,
      "learning_rate": 1.9822210294844222e-05,
      "loss": 2.355,
      "step": 2101
    },
    {
      "epoch": 0.816687307253345,
      "grad_norm": 0.60546875,
      "learning_rate": 1.9741183271987284e-05,
      "loss": 2.4106,
      "step": 2102
    },
    {
      "epoch": 0.8170758359437604,
      "grad_norm": 0.5859375,
      "learning_rate": 1.9660304054577816e-05,
      "loss": 2.2703,
      "step": 2103
    },
    {
      "epoch": 0.8174643646341759,
      "grad_norm": 0.59375,
      "learning_rate": 1.9579572791563804e-05,
      "loss": 2.3253,
      "step": 2104
    },
    {
      "epoch": 0.8178528933245914,
      "grad_norm": 0.61328125,
      "learning_rate": 1.9498989631620767e-05,
      "loss": 2.3657,
      "step": 2105
    },
    {
      "epoch": 0.8182414220150069,
      "grad_norm": 0.6171875,
      "learning_rate": 1.941855472315145e-05,
      "loss": 2.4544,
      "step": 2106
    },
    {
      "epoch": 0.8186299507054224,
      "grad_norm": 0.6015625,
      "learning_rate": 1.9338268214285604e-05,
      "loss": 2.36,
      "step": 2107
    },
    {
      "epoch": 0.8190184793958379,
      "grad_norm": 0.66015625,
      "learning_rate": 1.925813025287968e-05,
      "loss": 2.3582,
      "step": 2108
    },
    {
      "epoch": 0.8194070080862533,
      "grad_norm": 0.6328125,
      "learning_rate": 1.9178140986516568e-05,
      "loss": 2.303,
      "step": 2109
    },
    {
      "epoch": 0.8197955367766688,
      "grad_norm": 0.63671875,
      "learning_rate": 1.9098300562505266e-05,
      "loss": 2.3696,
      "step": 2110
    },
    {
      "epoch": 0.8201840654670843,
      "grad_norm": 0.61328125,
      "learning_rate": 1.9018609127880727e-05,
      "loss": 2.3677,
      "step": 2111
    },
    {
      "epoch": 0.8205725941574998,
      "grad_norm": 0.62890625,
      "learning_rate": 1.8939066829403508e-05,
      "loss": 2.3268,
      "step": 2112
    },
    {
      "epoch": 0.8209611228479153,
      "grad_norm": 0.6015625,
      "learning_rate": 1.8859673813559497e-05,
      "loss": 2.3276,
      "step": 2113
    },
    {
      "epoch": 0.8213496515383308,
      "grad_norm": 0.6015625,
      "learning_rate": 1.8780430226559686e-05,
      "loss": 2.3263,
      "step": 2114
    },
    {
      "epoch": 0.8217381802287462,
      "grad_norm": 0.61328125,
      "learning_rate": 1.8701336214339883e-05,
      "loss": 2.3716,
      "step": 2115
    },
    {
      "epoch": 0.8221267089191617,
      "grad_norm": 0.6796875,
      "learning_rate": 1.8622391922560377e-05,
      "loss": 2.4096,
      "step": 2116
    },
    {
      "epoch": 0.8225152376095772,
      "grad_norm": 0.6171875,
      "learning_rate": 1.8543597496605793e-05,
      "loss": 2.3107,
      "step": 2117
    },
    {
      "epoch": 0.8229037662999927,
      "grad_norm": 0.6484375,
      "learning_rate": 1.8464953081584735e-05,
      "loss": 2.3928,
      "step": 2118
    },
    {
      "epoch": 0.8232922949904082,
      "grad_norm": 0.62109375,
      "learning_rate": 1.83864588223296e-05,
      "loss": 2.3962,
      "step": 2119
    },
    {
      "epoch": 0.8236808236808236,
      "grad_norm": 0.625,
      "learning_rate": 1.830811486339613e-05,
      "loss": 2.3183,
      "step": 2120
    },
    {
      "epoch": 0.8240693523712391,
      "grad_norm": 0.59375,
      "learning_rate": 1.8229921349063396e-05,
      "loss": 2.3754,
      "step": 2121
    },
    {
      "epoch": 0.8244578810616546,
      "grad_norm": 0.59765625,
      "learning_rate": 1.8151878423333346e-05,
      "loss": 2.3284,
      "step": 2122
    },
    {
      "epoch": 0.8248464097520701,
      "grad_norm": 0.640625,
      "learning_rate": 1.807398622993064e-05,
      "loss": 2.3157,
      "step": 2123
    },
    {
      "epoch": 0.8252349384424856,
      "grad_norm": 0.66796875,
      "learning_rate": 1.7996244912302317e-05,
      "loss": 2.3063,
      "step": 2124
    },
    {
      "epoch": 0.8256234671329011,
      "grad_norm": 0.6015625,
      "learning_rate": 1.791865461361758e-05,
      "loss": 2.3508,
      "step": 2125
    },
    {
      "epoch": 0.8260119958233165,
      "grad_norm": 0.62890625,
      "learning_rate": 1.7841215476767493e-05,
      "loss": 2.3617,
      "step": 2126
    },
    {
      "epoch": 0.826400524513732,
      "grad_norm": 0.58984375,
      "learning_rate": 1.7763927644364765e-05,
      "loss": 2.3354,
      "step": 2127
    },
    {
      "epoch": 0.8267890532041475,
      "grad_norm": 0.58203125,
      "learning_rate": 1.7686791258743473e-05,
      "loss": 2.2842,
      "step": 2128
    },
    {
      "epoch": 0.827177581894563,
      "grad_norm": 0.59765625,
      "learning_rate": 1.76098064619587e-05,
      "loss": 2.2952,
      "step": 2129
    },
    {
      "epoch": 0.8275661105849785,
      "grad_norm": 0.625,
      "learning_rate": 1.7532973395786467e-05,
      "loss": 2.3243,
      "step": 2130
    },
    {
      "epoch": 0.827954639275394,
      "grad_norm": 0.63671875,
      "learning_rate": 1.7456292201723325e-05,
      "loss": 2.3563,
      "step": 2131
    },
    {
      "epoch": 0.8283431679658094,
      "grad_norm": 0.61328125,
      "learning_rate": 1.7379763020986117e-05,
      "loss": 2.3241,
      "step": 2132
    },
    {
      "epoch": 0.8287316966562249,
      "grad_norm": 0.61328125,
      "learning_rate": 1.7303385994511778e-05,
      "loss": 2.3951,
      "step": 2133
    },
    {
      "epoch": 0.8291202253466404,
      "grad_norm": 0.62109375,
      "learning_rate": 1.722716126295699e-05,
      "loss": 2.3436,
      "step": 2134
    },
    {
      "epoch": 0.8295087540370559,
      "grad_norm": 0.609375,
      "learning_rate": 1.7151088966698004e-05,
      "loss": 2.2888,
      "step": 2135
    },
    {
      "epoch": 0.8298972827274714,
      "grad_norm": 0.6171875,
      "learning_rate": 1.7075169245830348e-05,
      "loss": 2.3908,
      "step": 2136
    },
    {
      "epoch": 0.8302858114178869,
      "grad_norm": 0.66796875,
      "learning_rate": 1.6999402240168505e-05,
      "loss": 2.4087,
      "step": 2137
    },
    {
      "epoch": 0.8306743401083023,
      "grad_norm": 0.60546875,
      "learning_rate": 1.6923788089245762e-05,
      "loss": 2.3647,
      "step": 2138
    },
    {
      "epoch": 0.8310628687987178,
      "grad_norm": 0.6171875,
      "learning_rate": 1.684832693231393e-05,
      "loss": 2.3629,
      "step": 2139
    },
    {
      "epoch": 0.8314513974891333,
      "grad_norm": 0.6171875,
      "learning_rate": 1.6773018908343018e-05,
      "loss": 2.3733,
      "step": 2140
    },
    {
      "epoch": 0.8318399261795488,
      "grad_norm": 0.64453125,
      "learning_rate": 1.669786415602105e-05,
      "loss": 2.3067,
      "step": 2141
    },
    {
      "epoch": 0.8322284548699643,
      "grad_norm": 0.6328125,
      "learning_rate": 1.662286281375377e-05,
      "loss": 2.3411,
      "step": 2142
    },
    {
      "epoch": 0.8326169835603798,
      "grad_norm": 0.6171875,
      "learning_rate": 1.654801501966442e-05,
      "loss": 2.3791,
      "step": 2143
    },
    {
      "epoch": 0.8330055122507952,
      "grad_norm": 0.7890625,
      "learning_rate": 1.6473320911593448e-05,
      "loss": 2.3025,
      "step": 2144
    },
    {
      "epoch": 0.8333940409412107,
      "grad_norm": 0.625,
      "learning_rate": 1.63987806270983e-05,
      "loss": 2.3023,
      "step": 2145
    },
    {
      "epoch": 0.8337825696316262,
      "grad_norm": 0.58984375,
      "learning_rate": 1.6324394303453073e-05,
      "loss": 2.3044,
      "step": 2146
    },
    {
      "epoch": 0.8341710983220417,
      "grad_norm": 0.58984375,
      "learning_rate": 1.625016207764839e-05,
      "loss": 2.3697,
      "step": 2147
    },
    {
      "epoch": 0.8345596270124572,
      "grad_norm": 0.59375,
      "learning_rate": 1.6176084086391074e-05,
      "loss": 2.3098,
      "step": 2148
    },
    {
      "epoch": 0.8349481557028727,
      "grad_norm": 0.61328125,
      "learning_rate": 1.6102160466103898e-05,
      "loss": 2.3355,
      "step": 2149
    },
    {
      "epoch": 0.8353366843932881,
      "grad_norm": 0.625,
      "learning_rate": 1.6028391352925354e-05,
      "loss": 2.3012,
      "step": 2150
    },
    {
      "epoch": 0.8357252130837036,
      "grad_norm": 0.6171875,
      "learning_rate": 1.5954776882709365e-05,
      "loss": 2.3258,
      "step": 2151
    },
    {
      "epoch": 0.8361137417741191,
      "grad_norm": 0.609375,
      "learning_rate": 1.5881317191025113e-05,
      "loss": 2.339,
      "step": 2152
    },
    {
      "epoch": 0.8365022704645346,
      "grad_norm": 0.63671875,
      "learning_rate": 1.5808012413156713e-05,
      "loss": 2.3705,
      "step": 2153
    },
    {
      "epoch": 0.8368907991549501,
      "grad_norm": 0.609375,
      "learning_rate": 1.5734862684102934e-05,
      "loss": 2.2996,
      "step": 2154
    },
    {
      "epoch": 0.8372793278453656,
      "grad_norm": 0.58984375,
      "learning_rate": 1.5661868138577096e-05,
      "loss": 2.3147,
      "step": 2155
    },
    {
      "epoch": 0.837667856535781,
      "grad_norm": 0.58984375,
      "learning_rate": 1.5589028911006675e-05,
      "loss": 2.3608,
      "step": 2156
    },
    {
      "epoch": 0.8380563852261965,
      "grad_norm": 0.62109375,
      "learning_rate": 1.5516345135533138e-05,
      "loss": 2.4363,
      "step": 2157
    },
    {
      "epoch": 0.838444913916612,
      "grad_norm": 0.6171875,
      "learning_rate": 1.5443816946011657e-05,
      "loss": 2.35,
      "step": 2158
    },
    {
      "epoch": 0.8388334426070275,
      "grad_norm": 0.609375,
      "learning_rate": 1.53714444760109e-05,
      "loss": 2.4453,
      "step": 2159
    },
    {
      "epoch": 0.839221971297443,
      "grad_norm": 0.59375,
      "learning_rate": 1.5299227858812693e-05,
      "loss": 2.4134,
      "step": 2160
    },
    {
      "epoch": 0.8396104999878585,
      "grad_norm": 0.62109375,
      "learning_rate": 1.5227167227411909e-05,
      "loss": 2.3808,
      "step": 2161
    },
    {
      "epoch": 0.8399990286782739,
      "grad_norm": 0.6171875,
      "learning_rate": 1.5155262714516138e-05,
      "loss": 2.446,
      "step": 2162
    },
    {
      "epoch": 0.8403875573686894,
      "grad_norm": 0.63671875,
      "learning_rate": 1.508351445254549e-05,
      "loss": 2.4715,
      "step": 2163
    },
    {
      "epoch": 0.8407760860591049,
      "grad_norm": 0.640625,
      "learning_rate": 1.501192257363222e-05,
      "loss": 2.3122,
      "step": 2164
    },
    {
      "epoch": 0.8411646147495204,
      "grad_norm": 0.640625,
      "learning_rate": 1.494048720962069e-05,
      "loss": 2.3619,
      "step": 2165
    },
    {
      "epoch": 0.8415531434399359,
      "grad_norm": 0.65234375,
      "learning_rate": 1.4869208492066989e-05,
      "loss": 2.363,
      "step": 2166
    },
    {
      "epoch": 0.8419416721303514,
      "grad_norm": 0.61328125,
      "learning_rate": 1.4798086552238732e-05,
      "loss": 2.3153,
      "step": 2167
    },
    {
      "epoch": 0.8423302008207668,
      "grad_norm": 0.609375,
      "learning_rate": 1.4727121521114784e-05,
      "loss": 2.2836,
      "step": 2168
    },
    {
      "epoch": 0.8427187295111823,
      "grad_norm": 0.625,
      "learning_rate": 1.4656313529385068e-05,
      "loss": 2.345,
      "step": 2169
    },
    {
      "epoch": 0.8431072582015978,
      "grad_norm": 0.62890625,
      "learning_rate": 1.4585662707450299e-05,
      "loss": 2.3396,
      "step": 2170
    },
    {
      "epoch": 0.8434957868920133,
      "grad_norm": 0.61328125,
      "learning_rate": 1.4515169185421751e-05,
      "loss": 2.4191,
      "step": 2171
    },
    {
      "epoch": 0.8438843155824288,
      "grad_norm": 0.609375,
      "learning_rate": 1.4444833093121025e-05,
      "loss": 2.3053,
      "step": 2172
    },
    {
      "epoch": 0.8442728442728443,
      "grad_norm": 0.61328125,
      "learning_rate": 1.4374654560079725e-05,
      "loss": 2.2813,
      "step": 2173
    },
    {
      "epoch": 0.8446613729632597,
      "grad_norm": 0.59765625,
      "learning_rate": 1.4304633715539384e-05,
      "loss": 2.3522,
      "step": 2174
    },
    {
      "epoch": 0.8450499016536752,
      "grad_norm": 0.62109375,
      "learning_rate": 1.4234770688451082e-05,
      "loss": 2.4164,
      "step": 2175
    },
    {
      "epoch": 0.8454384303440907,
      "grad_norm": 0.61328125,
      "learning_rate": 1.416506560747528e-05,
      "loss": 2.3329,
      "step": 2176
    },
    {
      "epoch": 0.8458269590345062,
      "grad_norm": 0.61328125,
      "learning_rate": 1.4095518600981562e-05,
      "loss": 2.3285,
      "step": 2177
    },
    {
      "epoch": 0.8462154877249217,
      "grad_norm": 0.62109375,
      "learning_rate": 1.4026129797048393e-05,
      "loss": 2.2963,
      "step": 2178
    },
    {
      "epoch": 0.8466040164153372,
      "grad_norm": 0.6640625,
      "learning_rate": 1.3956899323462891e-05,
      "loss": 2.3549,
      "step": 2179
    },
    {
      "epoch": 0.8469925451057526,
      "grad_norm": 0.609375,
      "learning_rate": 1.3887827307720636e-05,
      "loss": 2.3483,
      "step": 2180
    },
    {
      "epoch": 0.8473810737961681,
      "grad_norm": 0.62109375,
      "learning_rate": 1.3818913877025286e-05,
      "loss": 2.4284,
      "step": 2181
    },
    {
      "epoch": 0.8477696024865836,
      "grad_norm": 0.60546875,
      "learning_rate": 1.3750159158288546e-05,
      "loss": 2.3375,
      "step": 2182
    },
    {
      "epoch": 0.8481581311769991,
      "grad_norm": 0.58984375,
      "learning_rate": 1.3681563278129794e-05,
      "loss": 2.3789,
      "step": 2183
    },
    {
      "epoch": 0.8485466598674146,
      "grad_norm": 0.609375,
      "learning_rate": 1.3613126362875906e-05,
      "loss": 2.3979,
      "step": 2184
    },
    {
      "epoch": 0.8489351885578301,
      "grad_norm": 0.62890625,
      "learning_rate": 1.3544848538560972e-05,
      "loss": 2.3132,
      "step": 2185
    },
    {
      "epoch": 0.8493237172482455,
      "grad_norm": 0.6484375,
      "learning_rate": 1.3476729930926147e-05,
      "loss": 2.3686,
      "step": 2186
    },
    {
      "epoch": 0.849712245938661,
      "grad_norm": 0.6171875,
      "learning_rate": 1.3408770665419335e-05,
      "loss": 2.4061,
      "step": 2187
    },
    {
      "epoch": 0.8501007746290765,
      "grad_norm": 0.6171875,
      "learning_rate": 1.3340970867195013e-05,
      "loss": 2.3404,
      "step": 2188
    },
    {
      "epoch": 0.850489303319492,
      "grad_norm": 0.58984375,
      "learning_rate": 1.3273330661113992e-05,
      "loss": 2.3503,
      "step": 2189
    },
    {
      "epoch": 0.8508778320099075,
      "grad_norm": 0.625,
      "learning_rate": 1.3205850171743106e-05,
      "loss": 2.3556,
      "step": 2190
    },
    {
      "epoch": 0.8512663607003229,
      "grad_norm": 0.61328125,
      "learning_rate": 1.3138529523355148e-05,
      "loss": 2.3403,
      "step": 2191
    },
    {
      "epoch": 0.8516548893907384,
      "grad_norm": 0.6015625,
      "learning_rate": 1.3071368839928488e-05,
      "loss": 2.3289,
      "step": 2192
    },
    {
      "epoch": 0.8520434180811539,
      "grad_norm": 0.6171875,
      "learning_rate": 1.3004368245146915e-05,
      "loss": 2.3767,
      "step": 2193
    },
    {
      "epoch": 0.8524319467715694,
      "grad_norm": 0.62890625,
      "learning_rate": 1.2937527862399424e-05,
      "loss": 2.3762,
      "step": 2194
    },
    {
      "epoch": 0.8528204754619849,
      "grad_norm": 0.59375,
      "learning_rate": 1.2870847814779907e-05,
      "loss": 2.3307,
      "step": 2195
    },
    {
      "epoch": 0.8532090041524004,
      "grad_norm": 0.609375,
      "learning_rate": 1.2804328225087048e-05,
      "loss": 2.3917,
      "step": 2196
    },
    {
      "epoch": 0.8535975328428158,
      "grad_norm": 0.6328125,
      "learning_rate": 1.2737969215823986e-05,
      "loss": 2.4013,
      "step": 2197
    },
    {
      "epoch": 0.8539860615332313,
      "grad_norm": 0.6015625,
      "learning_rate": 1.2671770909198122e-05,
      "loss": 2.3378,
      "step": 2198
    },
    {
      "epoch": 0.8543745902236468,
      "grad_norm": 0.61328125,
      "learning_rate": 1.2605733427120925e-05,
      "loss": 2.3566,
      "step": 2199
    },
    {
      "epoch": 0.8547631189140623,
      "grad_norm": 0.59375,
      "learning_rate": 1.2539856891207712e-05,
      "loss": 2.4097,
      "step": 2200
    },
    {
      "epoch": 0.8551516476044778,
      "grad_norm": 0.6171875,
      "learning_rate": 1.2474141422777363e-05,
      "loss": 2.3139,
      "step": 2201
    },
    {
      "epoch": 0.8555401762948933,
      "grad_norm": 0.625,
      "learning_rate": 1.2408587142852179e-05,
      "loss": 2.3955,
      "step": 2202
    },
    {
      "epoch": 0.8559287049853087,
      "grad_norm": 0.5859375,
      "learning_rate": 1.2343194172157535e-05,
      "loss": 2.3451,
      "step": 2203
    },
    {
      "epoch": 0.8563172336757242,
      "grad_norm": 0.60546875,
      "learning_rate": 1.2277962631121809e-05,
      "loss": 2.3197,
      "step": 2204
    },
    {
      "epoch": 0.8567057623661397,
      "grad_norm": 0.58203125,
      "learning_rate": 1.221289263987606e-05,
      "loss": 2.319,
      "step": 2205
    },
    {
      "epoch": 0.8570942910565552,
      "grad_norm": 0.59375,
      "learning_rate": 1.2147984318253857e-05,
      "loss": 2.3246,
      "step": 2206
    },
    {
      "epoch": 0.8574828197469707,
      "grad_norm": 0.59375,
      "learning_rate": 1.2083237785791003e-05,
      "loss": 2.3354,
      "step": 2207
    },
    {
      "epoch": 0.8578713484373862,
      "grad_norm": 0.65234375,
      "learning_rate": 1.201865316172539e-05,
      "loss": 2.4292,
      "step": 2208
    },
    {
      "epoch": 0.8582598771278016,
      "grad_norm": 0.60546875,
      "learning_rate": 1.1954230564996682e-05,
      "loss": 2.3665,
      "step": 2209
    },
    {
      "epoch": 0.8586484058182171,
      "grad_norm": 0.60546875,
      "learning_rate": 1.1889970114246196e-05,
      "loss": 2.2845,
      "step": 2210
    },
    {
      "epoch": 0.8590369345086326,
      "grad_norm": 0.6171875,
      "learning_rate": 1.1825871927816635e-05,
      "loss": 2.3748,
      "step": 2211
    },
    {
      "epoch": 0.8594254631990481,
      "grad_norm": 0.6171875,
      "learning_rate": 1.176193612375186e-05,
      "loss": 2.3349,
      "step": 2212
    },
    {
      "epoch": 0.8598139918894636,
      "grad_norm": 0.625,
      "learning_rate": 1.1698162819796698e-05,
      "loss": 2.3285,
      "step": 2213
    },
    {
      "epoch": 0.8602025205798791,
      "grad_norm": 0.59375,
      "learning_rate": 1.1634552133396704e-05,
      "loss": 2.3757,
      "step": 2214
    },
    {
      "epoch": 0.8605910492702945,
      "grad_norm": 0.5859375,
      "learning_rate": 1.1571104181697957e-05,
      "loss": 2.3622,
      "step": 2215
    },
    {
      "epoch": 0.86097957796071,
      "grad_norm": 0.625,
      "learning_rate": 1.1507819081546878e-05,
      "loss": 2.3709,
      "step": 2216
    },
    {
      "epoch": 0.8613681066511255,
      "grad_norm": 0.59375,
      "learning_rate": 1.1444696949489908e-05,
      "loss": 2.3431,
      "step": 2217
    },
    {
      "epoch": 0.861756635341541,
      "grad_norm": 0.59765625,
      "learning_rate": 1.1381737901773405e-05,
      "loss": 2.3009,
      "step": 2218
    },
    {
      "epoch": 0.8621451640319565,
      "grad_norm": 0.61328125,
      "learning_rate": 1.1318942054343395e-05,
      "loss": 2.3848,
      "step": 2219
    },
    {
      "epoch": 0.862533692722372,
      "grad_norm": 0.62109375,
      "learning_rate": 1.1256309522845355e-05,
      "loss": 2.3062,
      "step": 2220
    },
    {
      "epoch": 0.8629222214127874,
      "grad_norm": 0.6484375,
      "learning_rate": 1.1193840422623981e-05,
      "loss": 2.3867,
      "step": 2221
    },
    {
      "epoch": 0.8633107501032029,
      "grad_norm": 0.6328125,
      "learning_rate": 1.1131534868722993e-05,
      "loss": 2.4272,
      "step": 2222
    },
    {
      "epoch": 0.8636992787936184,
      "grad_norm": 0.60546875,
      "learning_rate": 1.106939297588494e-05,
      "loss": 2.3541,
      "step": 2223
    },
    {
      "epoch": 0.8640878074840339,
      "grad_norm": 0.6015625,
      "learning_rate": 1.1007414858550968e-05,
      "loss": 2.3333,
      "step": 2224
    },
    {
      "epoch": 0.8644763361744494,
      "grad_norm": 0.62109375,
      "learning_rate": 1.0945600630860563e-05,
      "loss": 2.3475,
      "step": 2225
    },
    {
      "epoch": 0.8648648648648649,
      "grad_norm": 0.6171875,
      "learning_rate": 1.0883950406651466e-05,
      "loss": 2.354,
      "step": 2226
    },
    {
      "epoch": 0.8652533935552803,
      "grad_norm": 0.65625,
      "learning_rate": 1.0822464299459345e-05,
      "loss": 2.3314,
      "step": 2227
    },
    {
      "epoch": 0.8656419222456958,
      "grad_norm": 0.58984375,
      "learning_rate": 1.0761142422517623e-05,
      "loss": 2.2504,
      "step": 2228
    },
    {
      "epoch": 0.8660304509361113,
      "grad_norm": 0.59765625,
      "learning_rate": 1.0699984888757308e-05,
      "loss": 2.3928,
      "step": 2229
    },
    {
      "epoch": 0.8664189796265268,
      "grad_norm": 0.62109375,
      "learning_rate": 1.063899181080672e-05,
      "loss": 2.345,
      "step": 2230
    },
    {
      "epoch": 0.8668075083169423,
      "grad_norm": 0.6015625,
      "learning_rate": 1.0578163300991328e-05,
      "loss": 2.3404,
      "step": 2231
    },
    {
      "epoch": 0.8671960370073578,
      "grad_norm": 0.609375,
      "learning_rate": 1.0517499471333536e-05,
      "loss": 2.3495,
      "step": 2232
    },
    {
      "epoch": 0.8675845656977732,
      "grad_norm": 0.61328125,
      "learning_rate": 1.0457000433552477e-05,
      "loss": 2.3391,
      "step": 2233
    },
    {
      "epoch": 0.8679730943881887,
      "grad_norm": 0.59375,
      "learning_rate": 1.0396666299063763e-05,
      "loss": 2.362,
      "step": 2234
    },
    {
      "epoch": 0.8683616230786042,
      "grad_norm": 0.60546875,
      "learning_rate": 1.0336497178979343e-05,
      "loss": 2.2885,
      "step": 2235
    },
    {
      "epoch": 0.8687501517690197,
      "grad_norm": 0.65234375,
      "learning_rate": 1.0276493184107305e-05,
      "loss": 2.3794,
      "step": 2236
    },
    {
      "epoch": 0.8691386804594352,
      "grad_norm": 0.6171875,
      "learning_rate": 1.0216654424951589e-05,
      "loss": 2.3564,
      "step": 2237
    },
    {
      "epoch": 0.8695272091498507,
      "grad_norm": 0.61328125,
      "learning_rate": 1.0156981011711875e-05,
      "loss": 2.3339,
      "step": 2238
    },
    {
      "epoch": 0.8699157378402661,
      "grad_norm": 0.7265625,
      "learning_rate": 1.0097473054283301e-05,
      "loss": 2.3127,
      "step": 2239
    },
    {
      "epoch": 0.8703042665306816,
      "grad_norm": 0.58984375,
      "learning_rate": 1.003813066225635e-05,
      "loss": 2.3678,
      "step": 2240
    },
    {
      "epoch": 0.8706927952210971,
      "grad_norm": 0.60546875,
      "learning_rate": 9.97895394491657e-06,
      "loss": 2.3859,
      "step": 2241
    },
    {
      "epoch": 0.8710813239115126,
      "grad_norm": 0.6015625,
      "learning_rate": 9.91994301124437e-06,
      "loss": 2.3713,
      "step": 2242
    },
    {
      "epoch": 0.8714698526019281,
      "grad_norm": 0.6171875,
      "learning_rate": 9.861097969914902e-06,
      "loss": 2.3683,
      "step": 2243
    },
    {
      "epoch": 0.8718583812923436,
      "grad_norm": 0.58203125,
      "learning_rate": 9.802418929297773e-06,
      "loss": 2.2977,
      "step": 2244
    },
    {
      "epoch": 0.872246909982759,
      "grad_norm": 0.5859375,
      "learning_rate": 9.743905997456926e-06,
      "loss": 2.3027,
      "step": 2245
    },
    {
      "epoch": 0.8726354386731745,
      "grad_norm": 0.5859375,
      "learning_rate": 9.68555928215037e-06,
      "loss": 2.3658,
      "step": 2246
    },
    {
      "epoch": 0.87302396736359,
      "grad_norm": 0.61328125,
      "learning_rate": 9.627378890829963e-06,
      "loss": 2.2962,
      "step": 2247
    },
    {
      "epoch": 0.8734124960540055,
      "grad_norm": 0.58203125,
      "learning_rate": 9.569364930641323e-06,
      "loss": 2.3248,
      "step": 2248
    },
    {
      "epoch": 0.873801024744421,
      "grad_norm": 0.59375,
      "learning_rate": 9.51151750842354e-06,
      "loss": 2.3288,
      "step": 2249
    },
    {
      "epoch": 0.8741895534348365,
      "grad_norm": 0.5859375,
      "learning_rate": 9.453836730709031e-06,
      "loss": 2.3732,
      "step": 2250
    },
    {
      "epoch": 0.8745780821252519,
      "grad_norm": 0.62109375,
      "learning_rate": 9.396322703723282e-06,
      "loss": 2.3707,
      "step": 2251
    },
    {
      "epoch": 0.8749666108156674,
      "grad_norm": 0.59765625,
      "learning_rate": 9.338975533384719e-06,
      "loss": 2.3222,
      "step": 2252
    },
    {
      "epoch": 0.8753551395060829,
      "grad_norm": 0.60546875,
      "learning_rate": 9.281795325304455e-06,
      "loss": 2.3055,
      "step": 2253
    },
    {
      "epoch": 0.8757436681964984,
      "grad_norm": 0.6484375,
      "learning_rate": 9.224782184786129e-06,
      "loss": 2.4232,
      "step": 2254
    },
    {
      "epoch": 0.8761321968869139,
      "grad_norm": 0.6328125,
      "learning_rate": 9.167936216825746e-06,
      "loss": 2.3638,
      "step": 2255
    },
    {
      "epoch": 0.8765207255773293,
      "grad_norm": 0.59375,
      "learning_rate": 9.111257526111394e-06,
      "loss": 2.4049,
      "step": 2256
    },
    {
      "epoch": 0.8769092542677448,
      "grad_norm": 0.60546875,
      "learning_rate": 9.054746217023125e-06,
      "loss": 2.3684,
      "step": 2257
    },
    {
      "epoch": 0.8772977829581603,
      "grad_norm": 0.62109375,
      "learning_rate": 8.998402393632754e-06,
      "loss": 2.3333,
      "step": 2258
    },
    {
      "epoch": 0.8776863116485758,
      "grad_norm": 0.609375,
      "learning_rate": 8.942226159703603e-06,
      "loss": 2.3723,
      "step": 2259
    },
    {
      "epoch": 0.8780748403389913,
      "grad_norm": 0.60546875,
      "learning_rate": 8.88621761869044e-06,
      "loss": 2.3631,
      "step": 2260
    },
    {
      "epoch": 0.8784633690294068,
      "grad_norm": 0.61328125,
      "learning_rate": 8.83037687373911e-06,
      "loss": 2.3523,
      "step": 2261
    },
    {
      "epoch": 0.8788518977198222,
      "grad_norm": 0.62109375,
      "learning_rate": 8.774704027686509e-06,
      "loss": 2.3548,
      "step": 2262
    },
    {
      "epoch": 0.8792404264102377,
      "grad_norm": 0.609375,
      "learning_rate": 8.719199183060322e-06,
      "loss": 2.3561,
      "step": 2263
    },
    {
      "epoch": 0.8796289551006532,
      "grad_norm": 0.6015625,
      "learning_rate": 8.663862442078819e-06,
      "loss": 2.3768,
      "step": 2264
    },
    {
      "epoch": 0.8800174837910687,
      "grad_norm": 0.62109375,
      "learning_rate": 8.608693906650711e-06,
      "loss": 2.4043,
      "step": 2265
    },
    {
      "epoch": 0.8804060124814842,
      "grad_norm": 0.6171875,
      "learning_rate": 8.55369367837493e-06,
      "loss": 2.3483,
      "step": 2266
    },
    {
      "epoch": 0.8807945411718997,
      "grad_norm": 0.63671875,
      "learning_rate": 8.498861858540451e-06,
      "loss": 2.3346,
      "step": 2267
    },
    {
      "epoch": 0.8811830698623151,
      "grad_norm": 0.6171875,
      "learning_rate": 8.444198548126104e-06,
      "loss": 2.3474,
      "step": 2268
    },
    {
      "epoch": 0.8815715985527306,
      "grad_norm": 0.58203125,
      "learning_rate": 8.389703847800402e-06,
      "loss": 2.3003,
      "step": 2269
    },
    {
      "epoch": 0.8819601272431461,
      "grad_norm": 0.59375,
      "learning_rate": 8.335377857921322e-06,
      "loss": 2.3599,
      "step": 2270
    },
    {
      "epoch": 0.8823486559335616,
      "grad_norm": 0.60546875,
      "learning_rate": 8.28122067853615e-06,
      "loss": 2.3675,
      "step": 2271
    },
    {
      "epoch": 0.8827371846239771,
      "grad_norm": 0.61328125,
      "learning_rate": 8.227232409381314e-06,
      "loss": 2.3494,
      "step": 2272
    },
    {
      "epoch": 0.8831257133143926,
      "grad_norm": 0.5859375,
      "learning_rate": 8.173413149882147e-06,
      "loss": 2.2979,
      "step": 2273
    },
    {
      "epoch": 0.883514242004808,
      "grad_norm": 0.61328125,
      "learning_rate": 8.119762999152747e-06,
      "loss": 2.3499,
      "step": 2274
    },
    {
      "epoch": 0.8839027706952235,
      "grad_norm": 0.6015625,
      "learning_rate": 8.066282055995766e-06,
      "loss": 2.2933,
      "step": 2275
    },
    {
      "epoch": 0.884291299385639,
      "grad_norm": 0.63671875,
      "learning_rate": 8.012970418902244e-06,
      "loss": 2.3571,
      "step": 2276
    },
    {
      "epoch": 0.8846798280760545,
      "grad_norm": 0.59765625,
      "learning_rate": 7.959828186051454e-06,
      "loss": 2.3032,
      "step": 2277
    },
    {
      "epoch": 0.88506835676647,
      "grad_norm": 0.62109375,
      "learning_rate": 7.906855455310647e-06,
      "loss": 2.3277,
      "step": 2278
    },
    {
      "epoch": 0.8854568854568855,
      "grad_norm": 0.59375,
      "learning_rate": 7.85405232423494e-06,
      "loss": 2.3656,
      "step": 2279
    },
    {
      "epoch": 0.8858454141473009,
      "grad_norm": 0.58203125,
      "learning_rate": 7.801418890067114e-06,
      "loss": 2.3602,
      "step": 2280
    },
    {
      "epoch": 0.8862339428377164,
      "grad_norm": 0.6171875,
      "learning_rate": 7.748955249737432e-06,
      "loss": 2.3514,
      "step": 2281
    },
    {
      "epoch": 0.8866224715281319,
      "grad_norm": 0.6171875,
      "learning_rate": 7.696661499863467e-06,
      "loss": 2.3756,
      "step": 2282
    },
    {
      "epoch": 0.8870110002185474,
      "grad_norm": 0.60546875,
      "learning_rate": 7.644537736749924e-06,
      "loss": 2.3808,
      "step": 2283
    },
    {
      "epoch": 0.8873995289089629,
      "grad_norm": 0.57421875,
      "learning_rate": 7.5925840563884405e-06,
      "loss": 2.3155,
      "step": 2284
    },
    {
      "epoch": 0.8877880575993784,
      "grad_norm": 0.62109375,
      "learning_rate": 7.54080055445745e-06,
      "loss": 2.3637,
      "step": 2285
    },
    {
      "epoch": 0.8881765862897938,
      "grad_norm": 0.6328125,
      "learning_rate": 7.489187326321978e-06,
      "loss": 2.2882,
      "step": 2286
    },
    {
      "epoch": 0.8885651149802093,
      "grad_norm": 0.59765625,
      "learning_rate": 7.437744467033436e-06,
      "loss": 2.3655,
      "step": 2287
    },
    {
      "epoch": 0.8889536436706248,
      "grad_norm": 0.59765625,
      "learning_rate": 7.386472071329542e-06,
      "loss": 2.4046,
      "step": 2288
    },
    {
      "epoch": 0.8893421723610403,
      "grad_norm": 0.64453125,
      "learning_rate": 7.335370233634042e-06,
      "loss": 2.31,
      "step": 2289
    },
    {
      "epoch": 0.8897307010514558,
      "grad_norm": 0.62890625,
      "learning_rate": 7.2844390480566126e-06,
      "loss": 2.4038,
      "step": 2290
    },
    {
      "epoch": 0.8901192297418713,
      "grad_norm": 0.60546875,
      "learning_rate": 7.2336786083926245e-06,
      "loss": 2.3153,
      "step": 2291
    },
    {
      "epoch": 0.8905077584322867,
      "grad_norm": 0.59765625,
      "learning_rate": 7.183089008123012e-06,
      "loss": 2.2857,
      "step": 2292
    },
    {
      "epoch": 0.8908962871227022,
      "grad_norm": 0.609375,
      "learning_rate": 7.132670340414105e-06,
      "loss": 2.3884,
      "step": 2293
    },
    {
      "epoch": 0.8912848158131177,
      "grad_norm": 0.59765625,
      "learning_rate": 7.08242269811743e-06,
      "loss": 2.2858,
      "step": 2294
    },
    {
      "epoch": 0.8916733445035332,
      "grad_norm": 0.6171875,
      "learning_rate": 7.032346173769544e-06,
      "loss": 2.345,
      "step": 2295
    },
    {
      "epoch": 0.8920618731939487,
      "grad_norm": 0.609375,
      "learning_rate": 6.98244085959191e-06,
      "loss": 2.3408,
      "step": 2296
    },
    {
      "epoch": 0.8924504018843642,
      "grad_norm": 0.62890625,
      "learning_rate": 6.932706847490622e-06,
      "loss": 2.2851,
      "step": 2297
    },
    {
      "epoch": 0.8928389305747796,
      "grad_norm": 0.58203125,
      "learning_rate": 6.8831442290563485e-06,
      "loss": 2.3528,
      "step": 2298
    },
    {
      "epoch": 0.8932274592651951,
      "grad_norm": 0.60546875,
      "learning_rate": 6.833753095564122e-06,
      "loss": 2.3832,
      "step": 2299
    },
    {
      "epoch": 0.8936159879556106,
      "grad_norm": 0.58984375,
      "learning_rate": 6.784533537973137e-06,
      "loss": 2.3626,
      "step": 2300
    },
    {
      "epoch": 0.8940045166460261,
      "grad_norm": 0.61328125,
      "learning_rate": 6.735485646926631e-06,
      "loss": 2.3842,
      "step": 2301
    },
    {
      "epoch": 0.8943930453364416,
      "grad_norm": 0.6171875,
      "learning_rate": 6.686609512751696e-06,
      "loss": 2.3222,
      "step": 2302
    },
    {
      "epoch": 0.8947815740268571,
      "grad_norm": 0.66796875,
      "learning_rate": 6.637905225459129e-06,
      "loss": 2.4044,
      "step": 2303
    },
    {
      "epoch": 0.8951701027172725,
      "grad_norm": 0.62109375,
      "learning_rate": 6.589372874743227e-06,
      "loss": 2.348,
      "step": 2304
    },
    {
      "epoch": 0.895558631407688,
      "grad_norm": 0.5859375,
      "learning_rate": 6.541012549981651e-06,
      "loss": 2.3778,
      "step": 2305
    },
    {
      "epoch": 0.8959471600981035,
      "grad_norm": 0.609375,
      "learning_rate": 6.492824340235271e-06,
      "loss": 2.3016,
      "step": 2306
    },
    {
      "epoch": 0.896335688788519,
      "grad_norm": 0.58203125,
      "learning_rate": 6.444808334247965e-06,
      "loss": 2.3245,
      "step": 2307
    },
    {
      "epoch": 0.8967242174789345,
      "grad_norm": 0.62890625,
      "learning_rate": 6.396964620446522e-06,
      "loss": 2.4092,
      "step": 2308
    },
    {
      "epoch": 0.89711274616935,
      "grad_norm": 0.58984375,
      "learning_rate": 6.3492932869403835e-06,
      "loss": 2.3346,
      "step": 2309
    },
    {
      "epoch": 0.8975012748597654,
      "grad_norm": 0.5859375,
      "learning_rate": 6.301794421521568e-06,
      "loss": 2.3933,
      "step": 2310
    },
    {
      "epoch": 0.8978898035501809,
      "grad_norm": 0.57421875,
      "learning_rate": 6.25446811166448e-06,
      "loss": 2.4146,
      "step": 2311
    },
    {
      "epoch": 0.8982783322405964,
      "grad_norm": 0.58203125,
      "learning_rate": 6.207314444525703e-06,
      "loss": 2.349,
      "step": 2312
    },
    {
      "epoch": 0.8986668609310119,
      "grad_norm": 0.6171875,
      "learning_rate": 6.160333506943938e-06,
      "loss": 2.3358,
      "step": 2313
    },
    {
      "epoch": 0.8990553896214274,
      "grad_norm": 0.60546875,
      "learning_rate": 6.113525385439733e-06,
      "loss": 2.3736,
      "step": 2314
    },
    {
      "epoch": 0.8994439183118429,
      "grad_norm": 0.6171875,
      "learning_rate": 6.066890166215389e-06,
      "loss": 2.3436,
      "step": 2315
    },
    {
      "epoch": 0.8998324470022583,
      "grad_norm": 0.671875,
      "learning_rate": 6.020427935154816e-06,
      "loss": 2.3194,
      "step": 2316
    },
    {
      "epoch": 0.9002209756926738,
      "grad_norm": 0.640625,
      "learning_rate": 5.974138777823312e-06,
      "loss": 2.4071,
      "step": 2317
    },
    {
      "epoch": 0.9006095043830893,
      "grad_norm": 0.59375,
      "learning_rate": 5.928022779467468e-06,
      "loss": 2.3346,
      "step": 2318
    },
    {
      "epoch": 0.9009980330735048,
      "grad_norm": 0.62109375,
      "learning_rate": 5.88208002501498e-06,
      "loss": 2.2619,
      "step": 2319
    },
    {
      "epoch": 0.9013865617639203,
      "grad_norm": 0.64453125,
      "learning_rate": 5.836310599074468e-06,
      "loss": 2.3823,
      "step": 2320
    },
    {
      "epoch": 0.9017750904543358,
      "grad_norm": 0.6640625,
      "learning_rate": 5.790714585935397e-06,
      "loss": 2.3954,
      "step": 2321
    },
    {
      "epoch": 0.9021636191447512,
      "grad_norm": 0.59765625,
      "learning_rate": 5.745292069567809e-06,
      "loss": 2.366,
      "step": 2322
    },
    {
      "epoch": 0.9025521478351667,
      "grad_norm": 0.6015625,
      "learning_rate": 5.700043133622291e-06,
      "loss": 2.4269,
      "step": 2323
    },
    {
      "epoch": 0.9029406765255822,
      "grad_norm": 0.6015625,
      "learning_rate": 5.654967861429738e-06,
      "loss": 2.3784,
      "step": 2324
    },
    {
      "epoch": 0.9033292052159977,
      "grad_norm": 0.6171875,
      "learning_rate": 5.61006633600123e-06,
      "loss": 2.3596,
      "step": 2325
    },
    {
      "epoch": 0.9037177339064132,
      "grad_norm": 0.66015625,
      "learning_rate": 5.565338640027862e-06,
      "loss": 2.323,
      "step": 2326
    },
    {
      "epoch": 0.9041062625968286,
      "grad_norm": 0.640625,
      "learning_rate": 5.520784855880612e-06,
      "loss": 2.3243,
      "step": 2327
    },
    {
      "epoch": 0.9044947912872441,
      "grad_norm": 0.59765625,
      "learning_rate": 5.4764050656101795e-06,
      "loss": 2.3683,
      "step": 2328
    },
    {
      "epoch": 0.9048833199776596,
      "grad_norm": 0.58984375,
      "learning_rate": 5.432199350946832e-06,
      "loss": 2.3238,
      "step": 2329
    },
    {
      "epoch": 0.9052718486680751,
      "grad_norm": 0.62109375,
      "learning_rate": 5.388167793300281e-06,
      "loss": 2.4312,
      "step": 2330
    },
    {
      "epoch": 0.9056603773584906,
      "grad_norm": 0.60546875,
      "learning_rate": 5.344310473759462e-06,
      "loss": 2.32,
      "step": 2331
    },
    {
      "epoch": 0.9060489060489061,
      "grad_norm": 0.60546875,
      "learning_rate": 5.300627473092457e-06,
      "loss": 2.3038,
      "step": 2332
    },
    {
      "epoch": 0.9064374347393215,
      "grad_norm": 0.63671875,
      "learning_rate": 5.257118871746347e-06,
      "loss": 2.3765,
      "step": 2333
    },
    {
      "epoch": 0.906825963429737,
      "grad_norm": 0.61328125,
      "learning_rate": 5.213784749846984e-06,
      "loss": 2.324,
      "step": 2334
    },
    {
      "epoch": 0.9072144921201525,
      "grad_norm": 0.59765625,
      "learning_rate": 5.170625187198941e-06,
      "loss": 2.3849,
      "step": 2335
    },
    {
      "epoch": 0.907603020810568,
      "grad_norm": 0.578125,
      "learning_rate": 5.127640263285294e-06,
      "loss": 2.2514,
      "step": 2336
    },
    {
      "epoch": 0.9079915495009835,
      "grad_norm": 0.58203125,
      "learning_rate": 5.084830057267509e-06,
      "loss": 2.3077,
      "step": 2337
    },
    {
      "epoch": 0.908380078191399,
      "grad_norm": 0.5859375,
      "learning_rate": 5.04219464798531e-06,
      "loss": 2.3745,
      "step": 2338
    },
    {
      "epoch": 0.9087686068818144,
      "grad_norm": 0.6171875,
      "learning_rate": 4.9997341139565e-06,
      "loss": 2.35,
      "step": 2339
    },
    {
      "epoch": 0.9091571355722299,
      "grad_norm": 0.5859375,
      "learning_rate": 4.957448533376819e-06,
      "loss": 2.3007,
      "step": 2340
    },
    {
      "epoch": 0.9095456642626454,
      "grad_norm": 0.609375,
      "learning_rate": 4.915337984119805e-06,
      "loss": 2.3123,
      "step": 2341
    },
    {
      "epoch": 0.9099341929530609,
      "grad_norm": 0.578125,
      "learning_rate": 4.8734025437366826e-06,
      "loss": 2.406,
      "step": 2342
    },
    {
      "epoch": 0.9103227216434764,
      "grad_norm": 0.59375,
      "learning_rate": 4.831642289456184e-06,
      "loss": 2.3487,
      "step": 2343
    },
    {
      "epoch": 0.9107112503338919,
      "grad_norm": 0.6015625,
      "learning_rate": 4.790057298184425e-06,
      "loss": 2.3409,
      "step": 2344
    },
    {
      "epoch": 0.9110997790243073,
      "grad_norm": 0.6171875,
      "learning_rate": 4.748647646504722e-06,
      "loss": 2.3852,
      "step": 2345
    },
    {
      "epoch": 0.9114883077147228,
      "grad_norm": 0.6171875,
      "learning_rate": 4.70741341067753e-06,
      "loss": 2.3947,
      "step": 2346
    },
    {
      "epoch": 0.9118768364051383,
      "grad_norm": 0.640625,
      "learning_rate": 4.666354666640216e-06,
      "loss": 2.4049,
      "step": 2347
    },
    {
      "epoch": 0.9122653650955538,
      "grad_norm": 0.6015625,
      "learning_rate": 4.625471490007005e-06,
      "loss": 2.3076,
      "step": 2348
    },
    {
      "epoch": 0.9126538937859693,
      "grad_norm": 0.62890625,
      "learning_rate": 4.584763956068738e-06,
      "loss": 2.3434,
      "step": 2349
    },
    {
      "epoch": 0.9130424224763848,
      "grad_norm": 0.6328125,
      "learning_rate": 4.544232139792826e-06,
      "loss": 2.325,
      "step": 2350
    },
    {
      "epoch": 0.9134309511668002,
      "grad_norm": 0.6484375,
      "learning_rate": 4.503876115823081e-06,
      "loss": 2.4135,
      "step": 2351
    },
    {
      "epoch": 0.9138194798572157,
      "grad_norm": 0.640625,
      "learning_rate": 4.463695958479563e-06,
      "loss": 2.4263,
      "step": 2352
    },
    {
      "epoch": 0.9142080085476312,
      "grad_norm": 0.6171875,
      "learning_rate": 4.423691741758451e-06,
      "loss": 2.3761,
      "step": 2353
    },
    {
      "epoch": 0.9145965372380467,
      "grad_norm": 0.60546875,
      "learning_rate": 4.383863539331923e-06,
      "loss": 2.3851,
      "step": 2354
    },
    {
      "epoch": 0.9149850659284622,
      "grad_norm": 0.5703125,
      "learning_rate": 4.3442114245479835e-06,
      "loss": 2.2704,
      "step": 2355
    },
    {
      "epoch": 0.9153735946188777,
      "grad_norm": 0.640625,
      "learning_rate": 4.304735470430387e-06,
      "loss": 2.3783,
      "step": 2356
    },
    {
      "epoch": 0.9157621233092931,
      "grad_norm": 0.6171875,
      "learning_rate": 4.265435749678448e-06,
      "loss": 2.4151,
      "step": 2357
    },
    {
      "epoch": 0.9161506519997086,
      "grad_norm": 0.5859375,
      "learning_rate": 4.226312334666904e-06,
      "loss": 2.2984,
      "step": 2358
    },
    {
      "epoch": 0.9165391806901241,
      "grad_norm": 0.58203125,
      "learning_rate": 4.187365297445844e-06,
      "loss": 2.3461,
      "step": 2359
    },
    {
      "epoch": 0.9169277093805396,
      "grad_norm": 0.609375,
      "learning_rate": 4.148594709740539e-06,
      "loss": 2.3782,
      "step": 2360
    },
    {
      "epoch": 0.9173162380709551,
      "grad_norm": 0.6015625,
      "learning_rate": 4.110000642951273e-06,
      "loss": 2.3375,
      "step": 2361
    },
    {
      "epoch": 0.9177047667613706,
      "grad_norm": 0.58203125,
      "learning_rate": 4.071583168153293e-06,
      "loss": 2.3792,
      "step": 2362
    },
    {
      "epoch": 0.918093295451786,
      "grad_norm": 0.6015625,
      "learning_rate": 4.033342356096592e-06,
      "loss": 2.3389,
      "step": 2363
    },
    {
      "epoch": 0.9184818241422015,
      "grad_norm": 0.62890625,
      "learning_rate": 3.995278277205839e-06,
      "loss": 2.4245,
      "step": 2364
    },
    {
      "epoch": 0.918870352832617,
      "grad_norm": 0.6015625,
      "learning_rate": 3.957391001580235e-06,
      "loss": 2.2686,
      "step": 2365
    },
    {
      "epoch": 0.9192588815230325,
      "grad_norm": 0.60546875,
      "learning_rate": 3.919680598993347e-06,
      "loss": 2.3117,
      "step": 2366
    },
    {
      "epoch": 0.919647410213448,
      "grad_norm": 0.58984375,
      "learning_rate": 3.882147138893055e-06,
      "loss": 2.3161,
      "step": 2367
    },
    {
      "epoch": 0.9200359389038635,
      "grad_norm": 0.62890625,
      "learning_rate": 3.844790690401356e-06,
      "loss": 2.3478,
      "step": 2368
    },
    {
      "epoch": 0.9204244675942789,
      "grad_norm": 0.6484375,
      "learning_rate": 3.807611322314242e-06,
      "loss": 2.303,
      "step": 2369
    },
    {
      "epoch": 0.9208129962846944,
      "grad_norm": 0.63671875,
      "learning_rate": 3.770609103101641e-06,
      "loss": 2.4052,
      "step": 2370
    },
    {
      "epoch": 0.9212015249751099,
      "grad_norm": 0.62890625,
      "learning_rate": 3.7337841009072007e-06,
      "loss": 2.3042,
      "step": 2371
    },
    {
      "epoch": 0.9215900536655254,
      "grad_norm": 0.62109375,
      "learning_rate": 3.6971363835482163e-06,
      "loss": 2.3081,
      "step": 2372
    },
    {
      "epoch": 0.9219785823559409,
      "grad_norm": 0.58984375,
      "learning_rate": 3.660666018515491e-06,
      "loss": 2.3679,
      "step": 2373
    },
    {
      "epoch": 0.9223671110463564,
      "grad_norm": 0.61328125,
      "learning_rate": 3.624373072973242e-06,
      "loss": 2.2973,
      "step": 2374
    },
    {
      "epoch": 0.9227556397367718,
      "grad_norm": 0.625,
      "learning_rate": 3.588257613758883e-06,
      "loss": 2.4296,
      "step": 2375
    },
    {
      "epoch": 0.9231441684271873,
      "grad_norm": 0.60546875,
      "learning_rate": 3.5523197073830337e-06,
      "loss": 2.2725,
      "step": 2376
    },
    {
      "epoch": 0.9235326971176028,
      "grad_norm": 0.5859375,
      "learning_rate": 3.5165594200293193e-06,
      "loss": 2.3307,
      "step": 2377
    },
    {
      "epoch": 0.9239212258080183,
      "grad_norm": 0.625,
      "learning_rate": 3.4809768175542046e-06,
      "loss": 2.3393,
      "step": 2378
    },
    {
      "epoch": 0.9243097544984338,
      "grad_norm": 0.59765625,
      "learning_rate": 3.4455719654870045e-06,
      "loss": 2.3512,
      "step": 2379
    },
    {
      "epoch": 0.9246982831888493,
      "grad_norm": 0.58984375,
      "learning_rate": 3.4103449290296297e-06,
      "loss": 2.3553,
      "step": 2380
    },
    {
      "epoch": 0.9250868118792647,
      "grad_norm": 0.59375,
      "learning_rate": 3.375295773056564e-06,
      "loss": 2.3058,
      "step": 2381
    },
    {
      "epoch": 0.9254753405696802,
      "grad_norm": 0.609375,
      "learning_rate": 3.3404245621146855e-06,
      "loss": 2.3938,
      "step": 2382
    },
    {
      "epoch": 0.9258638692600957,
      "grad_norm": 0.63671875,
      "learning_rate": 3.305731360423159e-06,
      "loss": 2.3489,
      "step": 2383
    },
    {
      "epoch": 0.9262523979505112,
      "grad_norm": 0.58984375,
      "learning_rate": 3.271216231873353e-06,
      "loss": 2.3626,
      "step": 2384
    },
    {
      "epoch": 0.9266409266409267,
      "grad_norm": 0.5703125,
      "learning_rate": 3.2368792400286453e-06,
      "loss": 2.3769,
      "step": 2385
    },
    {
      "epoch": 0.9270294553313422,
      "grad_norm": 0.61328125,
      "learning_rate": 3.202720448124408e-06,
      "loss": 2.2995,
      "step": 2386
    },
    {
      "epoch": 0.9274179840217576,
      "grad_norm": 0.6015625,
      "learning_rate": 3.16873991906782e-06,
      "loss": 2.3558,
      "step": 2387
    },
    {
      "epoch": 0.9278065127121731,
      "grad_norm": 0.56640625,
      "learning_rate": 3.134937715437758e-06,
      "loss": 2.2806,
      "step": 2388
    },
    {
      "epoch": 0.9281950414025886,
      "grad_norm": 0.62109375,
      "learning_rate": 3.1013138994847036e-06,
      "loss": 2.3126,
      "step": 2389
    },
    {
      "epoch": 0.9285835700930041,
      "grad_norm": 0.59765625,
      "learning_rate": 3.0678685331306133e-06,
      "loss": 2.3287,
      "step": 2390
    },
    {
      "epoch": 0.9289720987834196,
      "grad_norm": 0.62890625,
      "learning_rate": 3.034601677968818e-06,
      "loss": 2.3597,
      "step": 2391
    },
    {
      "epoch": 0.929360627473835,
      "grad_norm": 0.60546875,
      "learning_rate": 3.0015133952638994e-06,
      "loss": 2.3254,
      "step": 2392
    },
    {
      "epoch": 0.9297491561642505,
      "grad_norm": 0.65234375,
      "learning_rate": 2.96860374595157e-06,
      "loss": 2.3784,
      "step": 2393
    },
    {
      "epoch": 0.930137684854666,
      "grad_norm": 0.62109375,
      "learning_rate": 2.9358727906385607e-06,
      "loss": 2.3332,
      "step": 2394
    },
    {
      "epoch": 0.9305262135450815,
      "grad_norm": 0.609375,
      "learning_rate": 2.9033205896025316e-06,
      "loss": 2.338,
      "step": 2395
    },
    {
      "epoch": 0.930914742235497,
      "grad_norm": 0.61328125,
      "learning_rate": 2.8709472027919405e-06,
      "loss": 2.3347,
      "step": 2396
    },
    {
      "epoch": 0.9313032709259125,
      "grad_norm": 0.609375,
      "learning_rate": 2.838752689825963e-06,
      "loss": 2.3698,
      "step": 2397
    },
    {
      "epoch": 0.9316917996163279,
      "grad_norm": 0.578125,
      "learning_rate": 2.8067371099943286e-06,
      "loss": 2.4252,
      "step": 2398
    },
    {
      "epoch": 0.9320803283067434,
      "grad_norm": 0.62109375,
      "learning_rate": 2.774900522257251e-06,
      "loss": 2.3157,
      "step": 2399
    },
    {
      "epoch": 0.9324688569971589,
      "grad_norm": 0.6484375,
      "learning_rate": 2.74324298524532e-06,
      "loss": 2.341,
      "step": 2400
    },
    {
      "epoch": 0.9328573856875744,
      "grad_norm": 0.578125,
      "learning_rate": 2.7117645572593777e-06,
      "loss": 2.3027,
      "step": 2401
    },
    {
      "epoch": 0.9332459143779899,
      "grad_norm": 0.625,
      "learning_rate": 2.6804652962703847e-06,
      "loss": 2.4317,
      "step": 2402
    },
    {
      "epoch": 0.9336344430684054,
      "grad_norm": 0.60546875,
      "learning_rate": 2.6493452599194112e-06,
      "loss": 2.4147,
      "step": 2403
    },
    {
      "epoch": 0.9340229717588208,
      "grad_norm": 0.6171875,
      "learning_rate": 2.6184045055174024e-06,
      "loss": 2.3043,
      "step": 2404
    },
    {
      "epoch": 0.9344115004492363,
      "grad_norm": 0.609375,
      "learning_rate": 2.5876430900451886e-06,
      "loss": 2.3907,
      "step": 2405
    },
    {
      "epoch": 0.9348000291396518,
      "grad_norm": 0.640625,
      "learning_rate": 2.5570610701532995e-06,
      "loss": 2.3827,
      "step": 2406
    },
    {
      "epoch": 0.9351885578300673,
      "grad_norm": 0.5859375,
      "learning_rate": 2.5266585021618717e-06,
      "loss": 2.4008,
      "step": 2407
    },
    {
      "epoch": 0.9355770865204828,
      "grad_norm": 0.6015625,
      "learning_rate": 2.4964354420606073e-06,
      "loss": 2.375,
      "step": 2408
    },
    {
      "epoch": 0.9359656152108983,
      "grad_norm": 0.640625,
      "learning_rate": 2.4663919455085727e-06,
      "loss": 2.3972,
      "step": 2409
    },
    {
      "epoch": 0.9363541439013137,
      "grad_norm": 0.6015625,
      "learning_rate": 2.4365280678342094e-06,
      "loss": 2.3483,
      "step": 2410
    },
    {
      "epoch": 0.9367426725917292,
      "grad_norm": 0.625,
      "learning_rate": 2.406843864035102e-06,
      "loss": 2.3772,
      "step": 2411
    },
    {
      "epoch": 0.9371312012821447,
      "grad_norm": 0.609375,
      "learning_rate": 2.377339388777999e-06,
      "loss": 2.3935,
      "step": 2412
    },
    {
      "epoch": 0.9375197299725602,
      "grad_norm": 0.61328125,
      "learning_rate": 2.348014696398626e-06,
      "loss": 2.3582,
      "step": 2413
    },
    {
      "epoch": 0.9379082586629757,
      "grad_norm": 0.625,
      "learning_rate": 2.3188698409016494e-06,
      "loss": 2.4331,
      "step": 2414
    },
    {
      "epoch": 0.9382967873533912,
      "grad_norm": 0.5859375,
      "learning_rate": 2.2899048759605136e-06,
      "loss": 2.3858,
      "step": 2415
    },
    {
      "epoch": 0.9386853160438066,
      "grad_norm": 0.61328125,
      "learning_rate": 2.261119854917404e-06,
      "loss": 2.3011,
      "step": 2416
    },
    {
      "epoch": 0.9390738447342221,
      "grad_norm": 0.60546875,
      "learning_rate": 2.2325148307831057e-06,
      "loss": 2.4114,
      "step": 2417
    },
    {
      "epoch": 0.9394623734246376,
      "grad_norm": 0.65234375,
      "learning_rate": 2.204089856236913e-06,
      "loss": 2.4188,
      "step": 2418
    },
    {
      "epoch": 0.9398509021150531,
      "grad_norm": 0.6015625,
      "learning_rate": 2.1758449836265403e-06,
      "loss": 2.2944,
      "step": 2419
    },
    {
      "epoch": 0.9402394308054686,
      "grad_norm": 0.6015625,
      "learning_rate": 2.147780264968047e-06,
      "loss": 2.3652,
      "step": 2420
    },
    {
      "epoch": 0.9406279594958841,
      "grad_norm": 0.62890625,
      "learning_rate": 2.1198957519456887e-06,
      "loss": 2.3548,
      "step": 2421
    },
    {
      "epoch": 0.9410164881862995,
      "grad_norm": 0.6171875,
      "learning_rate": 2.092191495911877e-06,
      "loss": 2.3325,
      "step": 2422
    },
    {
      "epoch": 0.941405016876715,
      "grad_norm": 0.60546875,
      "learning_rate": 2.0646675478870337e-06,
      "loss": 2.3293,
      "step": 2423
    },
    {
      "epoch": 0.9417935455671305,
      "grad_norm": 0.60546875,
      "learning_rate": 2.037323958559545e-06,
      "loss": 2.4153,
      "step": 2424
    },
    {
      "epoch": 0.942182074257546,
      "grad_norm": 0.6015625,
      "learning_rate": 2.0101607782856526e-06,
      "loss": 2.306,
      "step": 2425
    },
    {
      "epoch": 0.9425706029479615,
      "grad_norm": 0.6015625,
      "learning_rate": 1.983178057089341e-06,
      "loss": 2.3611,
      "step": 2426
    },
    {
      "epoch": 0.942959131638377,
      "grad_norm": 0.64453125,
      "learning_rate": 1.9563758446622502e-06,
      "loss": 2.3424,
      "step": 2427
    },
    {
      "epoch": 0.9433476603287924,
      "grad_norm": 0.61328125,
      "learning_rate": 1.9297541903636196e-06,
      "loss": 2.3627,
      "step": 2428
    },
    {
      "epoch": 0.9437361890192079,
      "grad_norm": 0.62890625,
      "learning_rate": 1.9033131432201424e-06,
      "loss": 2.349,
      "step": 2429
    },
    {
      "epoch": 0.9441247177096234,
      "grad_norm": 0.6484375,
      "learning_rate": 1.877052751925934e-06,
      "loss": 2.3583,
      "step": 2430
    },
    {
      "epoch": 0.9445132464000389,
      "grad_norm": 0.66796875,
      "learning_rate": 1.8509730648423762e-06,
      "loss": 2.2995,
      "step": 2431
    },
    {
      "epoch": 0.9449017750904544,
      "grad_norm": 0.63671875,
      "learning_rate": 1.8250741299980945e-06,
      "loss": 2.331,
      "step": 2432
    },
    {
      "epoch": 0.9452903037808699,
      "grad_norm": 0.6015625,
      "learning_rate": 1.7993559950888362e-06,
      "loss": 2.3712,
      "step": 2433
    },
    {
      "epoch": 0.9456788324712853,
      "grad_norm": 0.59765625,
      "learning_rate": 1.7738187074773705e-06,
      "loss": 2.3326,
      "step": 2434
    },
    {
      "epoch": 0.9460673611617008,
      "grad_norm": 0.6171875,
      "learning_rate": 1.748462314193422e-06,
      "loss": 2.3624,
      "step": 2435
    },
    {
      "epoch": 0.9464558898521163,
      "grad_norm": 0.6015625,
      "learning_rate": 1.723286861933593e-06,
      "loss": 2.4015,
      "step": 2436
    },
    {
      "epoch": 0.9468444185425318,
      "grad_norm": 0.5859375,
      "learning_rate": 1.6982923970612409e-06,
      "loss": 2.3807,
      "step": 2437
    },
    {
      "epoch": 0.9472329472329473,
      "grad_norm": 0.625,
      "learning_rate": 1.673478965606423e-06,
      "loss": 2.3904,
      "step": 2438
    },
    {
      "epoch": 0.9476214759233628,
      "grad_norm": 0.609375,
      "learning_rate": 1.6488466132658087e-06,
      "loss": 2.327,
      "step": 2439
    },
    {
      "epoch": 0.9480100046137782,
      "grad_norm": 0.578125,
      "learning_rate": 1.6243953854025773e-06,
      "loss": 2.3492,
      "step": 2440
    },
    {
      "epoch": 0.9483985333041937,
      "grad_norm": 0.609375,
      "learning_rate": 1.6001253270463757e-06,
      "loss": 2.3737,
      "step": 2441
    },
    {
      "epoch": 0.9487870619946092,
      "grad_norm": 0.61328125,
      "learning_rate": 1.5760364828931728e-06,
      "loss": 2.3146,
      "step": 2442
    },
    {
      "epoch": 0.9491755906850247,
      "grad_norm": 0.59375,
      "learning_rate": 1.5521288973052273e-06,
      "loss": 2.3018,
      "step": 2443
    },
    {
      "epoch": 0.9495641193754402,
      "grad_norm": 0.5859375,
      "learning_rate": 1.5284026143110087e-06,
      "loss": 2.3319,
      "step": 2444
    },
    {
      "epoch": 0.9499526480658557,
      "grad_norm": 0.58984375,
      "learning_rate": 1.504857677605065e-06,
      "loss": 2.3617,
      "step": 2445
    },
    {
      "epoch": 0.950341176756271,
      "grad_norm": 0.59375,
      "learning_rate": 1.481494130547989e-06,
      "loss": 2.3314,
      "step": 2446
    },
    {
      "epoch": 0.9507297054466866,
      "grad_norm": 0.58203125,
      "learning_rate": 1.45831201616633e-06,
      "loss": 2.3293,
      "step": 2447
    },
    {
      "epoch": 0.9511182341371021,
      "grad_norm": 0.58984375,
      "learning_rate": 1.435311377152493e-06,
      "loss": 2.3415,
      "step": 2448
    },
    {
      "epoch": 0.9515067628275176,
      "grad_norm": 0.5859375,
      "learning_rate": 1.4124922558646946e-06,
      "loss": 2.3067,
      "step": 2449
    },
    {
      "epoch": 0.9518952915179331,
      "grad_norm": 0.61328125,
      "learning_rate": 1.3898546943268643e-06,
      "loss": 2.4032,
      "step": 2450
    },
    {
      "epoch": 0.9522838202083486,
      "grad_norm": 0.65625,
      "learning_rate": 1.3673987342285533e-06,
      "loss": 2.3676,
      "step": 2451
    },
    {
      "epoch": 0.952672348898764,
      "grad_norm": 0.6015625,
      "learning_rate": 1.3451244169249033e-06,
      "loss": 2.3903,
      "step": 2452
    },
    {
      "epoch": 0.9530608775891795,
      "grad_norm": 0.6328125,
      "learning_rate": 1.3230317834365013e-06,
      "loss": 2.367,
      "step": 2453
    },
    {
      "epoch": 0.953449406279595,
      "grad_norm": 0.6015625,
      "learning_rate": 1.3011208744493796e-06,
      "loss": 2.3764,
      "step": 2454
    },
    {
      "epoch": 0.9538379349700105,
      "grad_norm": 0.66796875,
      "learning_rate": 1.2793917303148716e-06,
      "loss": 2.3566,
      "step": 2455
    },
    {
      "epoch": 0.954226463660426,
      "grad_norm": 0.78125,
      "learning_rate": 1.2578443910496008e-06,
      "loss": 2.4127,
      "step": 2456
    },
    {
      "epoch": 0.9546149923508415,
      "grad_norm": 0.69140625,
      "learning_rate": 1.236478896335358e-06,
      "loss": 2.3669,
      "step": 2457
    },
    {
      "epoch": 0.9550035210412569,
      "grad_norm": 0.609375,
      "learning_rate": 1.2152952855190692e-06,
      "loss": 2.311,
      "step": 2458
    },
    {
      "epoch": 0.9553920497316724,
      "grad_norm": 0.625,
      "learning_rate": 1.1942935976126724e-06,
      "loss": 2.4434,
      "step": 2459
    },
    {
      "epoch": 0.9557805784220879,
      "grad_norm": 0.58984375,
      "learning_rate": 1.1734738712930849e-06,
      "loss": 2.3608,
      "step": 2460
    },
    {
      "epoch": 0.9561691071125034,
      "grad_norm": 0.625,
      "learning_rate": 1.1528361449021475e-06,
      "loss": 2.3522,
      "step": 2461
    },
    {
      "epoch": 0.9565576358029189,
      "grad_norm": 0.5703125,
      "learning_rate": 1.1323804564464802e-06,
      "loss": 2.3984,
      "step": 2462
    },
    {
      "epoch": 0.9569461644933343,
      "grad_norm": 0.6328125,
      "learning_rate": 1.1121068435974935e-06,
      "loss": 2.3543,
      "step": 2463
    },
    {
      "epoch": 0.9573346931837498,
      "grad_norm": 0.60546875,
      "learning_rate": 1.0920153436912883e-06,
      "loss": 2.2961,
      "step": 2464
    },
    {
      "epoch": 0.9577232218741653,
      "grad_norm": 0.62109375,
      "learning_rate": 1.072105993728556e-06,
      "loss": 2.3472,
      "step": 2465
    },
    {
      "epoch": 0.9581117505645808,
      "grad_norm": 0.61328125,
      "learning_rate": 1.0523788303745674e-06,
      "loss": 2.3973,
      "step": 2466
    },
    {
      "epoch": 0.9585002792549963,
      "grad_norm": 0.60546875,
      "learning_rate": 1.0328338899590616e-06,
      "loss": 2.3392,
      "step": 2467
    },
    {
      "epoch": 0.9588888079454118,
      "grad_norm": 0.65234375,
      "learning_rate": 1.0134712084762022e-06,
      "loss": 2.3869,
      "step": 2468
    },
    {
      "epoch": 0.9592773366358271,
      "grad_norm": 0.6171875,
      "learning_rate": 9.94290821584498e-07,
      "loss": 2.303,
      "step": 2469
    },
    {
      "epoch": 0.9596658653262427,
      "grad_norm": 0.58203125,
      "learning_rate": 9.752927646067388e-07,
      "loss": 2.3608,
      "step": 2470
    },
    {
      "epoch": 0.9600543940166582,
      "grad_norm": 0.65625,
      "learning_rate": 9.564770725299376e-07,
      "loss": 2.3699,
      "step": 2471
    },
    {
      "epoch": 0.9604429227070737,
      "grad_norm": 0.609375,
      "learning_rate": 9.378437800052764e-07,
      "loss": 2.3729,
      "step": 2472
    },
    {
      "epoch": 0.9608314513974892,
      "grad_norm": 0.58203125,
      "learning_rate": 9.193929213480057e-07,
      "loss": 2.3335,
      "step": 2473
    },
    {
      "epoch": 0.9612199800879047,
      "grad_norm": 0.6171875,
      "learning_rate": 9.011245305374006e-07,
      "loss": 2.3212,
      "step": 2474
    },
    {
      "epoch": 0.96160850877832,
      "grad_norm": 0.5859375,
      "learning_rate": 8.83038641216738e-07,
      "loss": 2.3239,
      "step": 2475
    },
    {
      "epoch": 0.9619970374687355,
      "grad_norm": 0.609375,
      "learning_rate": 8.651352866931639e-07,
      "loss": 2.2481,
      "step": 2476
    },
    {
      "epoch": 0.962385566159151,
      "grad_norm": 0.6484375,
      "learning_rate": 8.47414499937671e-07,
      "loss": 2.3728,
      "step": 2477
    },
    {
      "epoch": 0.9627740948495666,
      "grad_norm": 0.60546875,
      "learning_rate": 8.298763135850429e-07,
      "loss": 2.3857,
      "step": 2478
    },
    {
      "epoch": 0.963162623539982,
      "grad_norm": 0.59765625,
      "learning_rate": 8.125207599337769e-07,
      "loss": 2.293,
      "step": 2479
    },
    {
      "epoch": 0.9635511522303976,
      "grad_norm": 0.5859375,
      "learning_rate": 7.953478709460394e-07,
      "loss": 2.347,
      "step": 2480
    },
    {
      "epoch": 0.963939680920813,
      "grad_norm": 0.62890625,
      "learning_rate": 7.783576782476099e-07,
      "loss": 2.3957,
      "step": 2481
    },
    {
      "epoch": 0.9643282096112284,
      "grad_norm": 0.625,
      "learning_rate": 7.615502131277819e-07,
      "loss": 2.3651,
      "step": 2482
    },
    {
      "epoch": 0.964716738301644,
      "grad_norm": 0.5859375,
      "learning_rate": 7.449255065393624e-07,
      "loss": 2.2837,
      "step": 2483
    },
    {
      "epoch": 0.9651052669920595,
      "grad_norm": 0.59765625,
      "learning_rate": 7.284835890985608e-07,
      "loss": 2.3985,
      "step": 2484
    },
    {
      "epoch": 0.965493795682475,
      "grad_norm": 0.61328125,
      "learning_rate": 7.122244910850006e-07,
      "loss": 2.3838,
      "step": 2485
    },
    {
      "epoch": 0.9658823243728905,
      "grad_norm": 0.6484375,
      "learning_rate": 6.961482424415855e-07,
      "loss": 2.3677,
      "step": 2486
    },
    {
      "epoch": 0.9662708530633058,
      "grad_norm": 0.60546875,
      "learning_rate": 6.802548727745106e-07,
      "loss": 2.4086,
      "step": 2487
    },
    {
      "epoch": 0.9666593817537213,
      "grad_norm": 0.6015625,
      "learning_rate": 6.645444113531519e-07,
      "loss": 2.3239,
      "step": 2488
    },
    {
      "epoch": 0.9670479104441368,
      "grad_norm": 0.58984375,
      "learning_rate": 6.490168871100766e-07,
      "loss": 2.3253,
      "step": 2489
    },
    {
      "epoch": 0.9674364391345524,
      "grad_norm": 0.60546875,
      "learning_rate": 6.336723286408996e-07,
      "loss": 2.3663,
      "step": 2490
    },
    {
      "epoch": 0.9678249678249679,
      "grad_norm": 0.6171875,
      "learning_rate": 6.185107642043275e-07,
      "loss": 2.3496,
      "step": 2491
    },
    {
      "epoch": 0.9682134965153834,
      "grad_norm": 0.59765625,
      "learning_rate": 6.035322217220584e-07,
      "loss": 2.377,
      "step": 2492
    },
    {
      "epoch": 0.9686020252057987,
      "grad_norm": 0.62890625,
      "learning_rate": 5.88736728778727e-07,
      "loss": 2.3933,
      "step": 2493
    },
    {
      "epoch": 0.9689905538962142,
      "grad_norm": 0.625,
      "learning_rate": 5.741243126218487e-07,
      "loss": 2.3243,
      "step": 2494
    },
    {
      "epoch": 0.9693790825866297,
      "grad_norm": 0.60546875,
      "learning_rate": 5.596950001618085e-07,
      "loss": 2.3747,
      "step": 2495
    },
    {
      "epoch": 0.9697676112770452,
      "grad_norm": 0.61328125,
      "learning_rate": 5.45448817971772e-07,
      "loss": 2.3768,
      "step": 2496
    },
    {
      "epoch": 0.9701561399674608,
      "grad_norm": 0.5859375,
      "learning_rate": 5.313857922876419e-07,
      "loss": 2.4069,
      "step": 2497
    },
    {
      "epoch": 0.9705446686578763,
      "grad_norm": 0.59765625,
      "learning_rate": 5.17505949008057e-07,
      "loss": 2.3018,
      "step": 2498
    },
    {
      "epoch": 0.9709331973482916,
      "grad_norm": 0.59765625,
      "learning_rate": 5.038093136942367e-07,
      "loss": 2.3644,
      "step": 2499
    },
    {
      "epoch": 0.9713217260387071,
      "grad_norm": 0.61328125,
      "learning_rate": 4.902959115700712e-07,
      "loss": 2.325,
      "step": 2500
    },
    {
      "epoch": 0.9717102547291226,
      "grad_norm": 0.625,
      "learning_rate": 4.769657675219752e-07,
      "loss": 2.4057,
      "step": 2501
    },
    {
      "epoch": 0.9720987834195381,
      "grad_norm": 0.59765625,
      "learning_rate": 4.6381890609886736e-07,
      "loss": 2.3302,
      "step": 2502
    },
    {
      "epoch": 0.9724873121099536,
      "grad_norm": 0.6015625,
      "learning_rate": 4.508553515121472e-07,
      "loss": 2.367,
      "step": 2503
    },
    {
      "epoch": 0.9728758408003692,
      "grad_norm": 0.62890625,
      "learning_rate": 4.380751276356176e-07,
      "loss": 2.4321,
      "step": 2504
    },
    {
      "epoch": 0.9732643694907845,
      "grad_norm": 0.61328125,
      "learning_rate": 4.25478258005485e-07,
      "loss": 2.4378,
      "step": 2505
    },
    {
      "epoch": 0.9736528981812,
      "grad_norm": 0.6015625,
      "learning_rate": 4.130647658202591e-07,
      "loss": 2.2761,
      "step": 2506
    },
    {
      "epoch": 0.9740414268716155,
      "grad_norm": 0.640625,
      "learning_rate": 4.008346739407642e-07,
      "loss": 2.3617,
      "step": 2507
    },
    {
      "epoch": 0.974429955562031,
      "grad_norm": 0.65625,
      "learning_rate": 3.887880048900394e-07,
      "loss": 2.3557,
      "step": 2508
    },
    {
      "epoch": 0.9748184842524465,
      "grad_norm": 0.5859375,
      "learning_rate": 3.7692478085337155e-07,
      "loss": 2.3727,
      "step": 2509
    },
    {
      "epoch": 0.975207012942862,
      "grad_norm": 0.58984375,
      "learning_rate": 3.652450236781957e-07,
      "loss": 2.3027,
      "step": 2510
    },
    {
      "epoch": 0.9755955416332774,
      "grad_norm": 0.61328125,
      "learning_rate": 3.5374875487405037e-07,
      "loss": 2.3671,
      "step": 2511
    },
    {
      "epoch": 0.9759840703236929,
      "grad_norm": 0.61328125,
      "learning_rate": 3.424359956126e-07,
      "loss": 2.3165,
      "step": 2512
    },
    {
      "epoch": 0.9763725990141084,
      "grad_norm": 0.625,
      "learning_rate": 3.313067667275238e-07,
      "loss": 2.3788,
      "step": 2513
    },
    {
      "epoch": 0.976761127704524,
      "grad_norm": 0.63671875,
      "learning_rate": 3.2036108871452695e-07,
      "loss": 2.3492,
      "step": 2514
    },
    {
      "epoch": 0.9771496563949394,
      "grad_norm": 0.57421875,
      "learning_rate": 3.0959898173128497e-07,
      "loss": 2.2973,
      "step": 2515
    },
    {
      "epoch": 0.977538185085355,
      "grad_norm": 0.61328125,
      "learning_rate": 2.990204655973994e-07,
      "loss": 2.4172,
      "step": 2516
    },
    {
      "epoch": 0.9779267137757703,
      "grad_norm": 0.62890625,
      "learning_rate": 2.8862555979437544e-07,
      "loss": 2.3124,
      "step": 2517
    },
    {
      "epoch": 0.9783152424661858,
      "grad_norm": 0.5859375,
      "learning_rate": 2.784142834655667e-07,
      "loss": 2.3208,
      "step": 2518
    },
    {
      "epoch": 0.9787037711566013,
      "grad_norm": 0.609375,
      "learning_rate": 2.6838665541616404e-07,
      "loss": 2.3884,
      "step": 2519
    },
    {
      "epoch": 0.9790922998470168,
      "grad_norm": 0.59375,
      "learning_rate": 2.585426941131619e-07,
      "loss": 2.4062,
      "step": 2520
    },
    {
      "epoch": 0.9794808285374323,
      "grad_norm": 0.64453125,
      "learning_rate": 2.488824176852922e-07,
      "loss": 2.3114,
      "step": 2521
    },
    {
      "epoch": 0.9798693572278478,
      "grad_norm": 0.62109375,
      "learning_rate": 2.394058439230129e-07,
      "loss": 2.3523,
      "step": 2522
    },
    {
      "epoch": 0.9802578859182632,
      "grad_norm": 0.609375,
      "learning_rate": 2.3011299027847487e-07,
      "loss": 2.3356,
      "step": 2523
    },
    {
      "epoch": 0.9806464146086787,
      "grad_norm": 0.59765625,
      "learning_rate": 2.210038738654996e-07,
      "loss": 2.3744,
      "step": 2524
    },
    {
      "epoch": 0.9810349432990942,
      "grad_norm": 0.59375,
      "learning_rate": 2.1207851145953473e-07,
      "loss": 2.2672,
      "step": 2525
    },
    {
      "epoch": 0.9814234719895097,
      "grad_norm": 0.59375,
      "learning_rate": 2.033369194975987e-07,
      "loss": 2.3416,
      "step": 2526
    },
    {
      "epoch": 0.9818120006799252,
      "grad_norm": 0.62109375,
      "learning_rate": 1.9477911407831396e-07,
      "loss": 2.3808,
      "step": 2527
    },
    {
      "epoch": 0.9822005293703406,
      "grad_norm": 0.58984375,
      "learning_rate": 1.86405110961807e-07,
      "loss": 2.3057,
      "step": 2528
    },
    {
      "epoch": 0.9825890580607561,
      "grad_norm": 0.609375,
      "learning_rate": 1.7821492556973075e-07,
      "loss": 2.4372,
      "step": 2529
    },
    {
      "epoch": 0.9829775867511716,
      "grad_norm": 0.609375,
      "learning_rate": 1.7020857298521986e-07,
      "loss": 2.3674,
      "step": 2530
    },
    {
      "epoch": 0.9833661154415871,
      "grad_norm": 0.640625,
      "learning_rate": 1.6238606795284662e-07,
      "loss": 2.3223,
      "step": 2531
    },
    {
      "epoch": 0.9837546441320026,
      "grad_norm": 0.59375,
      "learning_rate": 1.547474248786096e-07,
      "loss": 2.2912,
      "step": 2532
    },
    {
      "epoch": 0.9841431728224181,
      "grad_norm": 0.625,
      "learning_rate": 1.4729265782993383e-07,
      "loss": 2.3836,
      "step": 2533
    },
    {
      "epoch": 0.9845317015128335,
      "grad_norm": 0.59765625,
      "learning_rate": 1.400217805355708e-07,
      "loss": 2.3461,
      "step": 2534
    },
    {
      "epoch": 0.984920230203249,
      "grad_norm": 0.6875,
      "learning_rate": 1.3293480638565392e-07,
      "loss": 2.3769,
      "step": 2535
    },
    {
      "epoch": 0.9853087588936645,
      "grad_norm": 0.6015625,
      "learning_rate": 1.2603174843162092e-07,
      "loss": 2.2917,
      "step": 2536
    },
    {
      "epoch": 0.98569728758408,
      "grad_norm": 0.61328125,
      "learning_rate": 1.1931261938621375e-07,
      "loss": 2.3128,
      "step": 2537
    },
    {
      "epoch": 0.9860858162744955,
      "grad_norm": 0.6171875,
      "learning_rate": 1.1277743162345644e-07,
      "loss": 2.344,
      "step": 2538
    },
    {
      "epoch": 0.986474344964911,
      "grad_norm": 0.65234375,
      "learning_rate": 1.0642619717859959e-07,
      "loss": 2.328,
      "step": 2539
    },
    {
      "epoch": 0.9868628736553264,
      "grad_norm": 0.59765625,
      "learning_rate": 1.002589277481536e-07,
      "loss": 2.3472,
      "step": 2540
    },
    {
      "epoch": 0.9872514023457419,
      "grad_norm": 0.6484375,
      "learning_rate": 9.427563468982215e-08,
      "loss": 2.3188,
      "step": 2541
    },
    {
      "epoch": 0.9876399310361574,
      "grad_norm": 0.62109375,
      "learning_rate": 8.847632902250214e-08,
      "loss": 2.3605,
      "step": 2542
    },
    {
      "epoch": 0.9880284597265729,
      "grad_norm": 0.64453125,
      "learning_rate": 8.28610214262393e-08,
      "loss": 2.3247,
      "step": 2543
    },
    {
      "epoch": 0.9884169884169884,
      "grad_norm": 0.60546875,
      "learning_rate": 7.742972224225043e-08,
      "loss": 2.3225,
      "step": 2544
    },
    {
      "epoch": 0.9888055171074039,
      "grad_norm": 0.640625,
      "learning_rate": 7.21824414728789e-08,
      "loss": 2.3744,
      "step": 2545
    },
    {
      "epoch": 0.9891940457978193,
      "grad_norm": 0.61328125,
      "learning_rate": 6.711918878155033e-08,
      "loss": 2.2812,
      "step": 2546
    },
    {
      "epoch": 0.9895825744882348,
      "grad_norm": 0.62109375,
      "learning_rate": 6.223997349282807e-08,
      "loss": 2.3866,
      "step": 2547
    },
    {
      "epoch": 0.9899711031786503,
      "grad_norm": 0.65234375,
      "learning_rate": 5.754480459229106e-08,
      "loss": 2.377,
      "step": 2548
    },
    {
      "epoch": 0.9903596318690658,
      "grad_norm": 0.62890625,
      "learning_rate": 5.303369072664488e-08,
      "loss": 2.3846,
      "step": 2549
    },
    {
      "epoch": 0.9907481605594813,
      "grad_norm": 6.3125,
      "learning_rate": 4.8706640203577406e-08,
      "loss": 2.2776,
      "step": 2550
    },
    {
      "epoch": 0.9911366892498968,
      "grad_norm": 0.60546875,
      "learning_rate": 4.456366099183651e-08,
      "loss": 2.3427,
      "step": 2551
    },
    {
      "epoch": 0.9915252179403122,
      "grad_norm": 0.59765625,
      "learning_rate": 4.0604760721174586e-08,
      "loss": 2.3697,
      "step": 2552
    },
    {
      "epoch": 0.9919137466307277,
      "grad_norm": 0.609375,
      "learning_rate": 3.6829946682348516e-08,
      "loss": 2.3838,
      "step": 2553
    },
    {
      "epoch": 0.9923022753211432,
      "grad_norm": 0.58984375,
      "learning_rate": 3.3239225827086386e-08,
      "loss": 2.2705,
      "step": 2554
    },
    {
      "epoch": 0.9926908040115587,
      "grad_norm": 0.61328125,
      "learning_rate": 2.983260476808747e-08,
      "loss": 2.3065,
      "step": 2555
    },
    {
      "epoch": 0.9930793327019742,
      "grad_norm": 0.63671875,
      "learning_rate": 2.6610089779044445e-08,
      "loss": 2.3795,
      "step": 2556
    },
    {
      "epoch": 0.9934678613923897,
      "grad_norm": 0.61328125,
      "learning_rate": 2.3571686794543467e-08,
      "loss": 2.3717,
      "step": 2557
    },
    {
      "epoch": 0.9938563900828051,
      "grad_norm": 0.62890625,
      "learning_rate": 2.0717401410164096e-08,
      "loss": 2.3575,
      "step": 2558
    },
    {
      "epoch": 0.9942449187732206,
      "grad_norm": 0.578125,
      "learning_rate": 1.8047238882379357e-08,
      "loss": 2.3786,
      "step": 2559
    },
    {
      "epoch": 0.9946334474636361,
      "grad_norm": 0.61328125,
      "learning_rate": 1.556120412857798e-08,
      "loss": 2.4276,
      "step": 2560
    },
    {
      "epoch": 0.9950219761540516,
      "grad_norm": 0.66015625,
      "learning_rate": 1.3259301727075458e-08,
      "loss": 2.3399,
      "step": 2561
    },
    {
      "epoch": 0.9954105048444671,
      "grad_norm": 0.609375,
      "learning_rate": 1.1141535917069678e-08,
      "loss": 2.3158,
      "step": 2562
    },
    {
      "epoch": 0.9957990335348826,
      "grad_norm": 0.61328125,
      "learning_rate": 9.207910598674208e-09,
      "loss": 2.317,
      "step": 2563
    },
    {
      "epoch": 0.996187562225298,
      "grad_norm": 0.6171875,
      "learning_rate": 7.458429332862781e-09,
      "loss": 2.3655,
      "step": 2564
    },
    {
      "epoch": 0.9965760909157135,
      "grad_norm": 0.66015625,
      "learning_rate": 5.893095341502619e-09,
      "loss": 2.3816,
      "step": 2565
    },
    {
      "epoch": 0.996964619606129,
      "grad_norm": 0.66796875,
      "learning_rate": 4.511911507321109e-09,
      "loss": 2.2994,
      "step": 2566
    },
    {
      "epoch": 0.9973531482965445,
      "grad_norm": 0.59375,
      "learning_rate": 3.31488037392802e-09,
      "loss": 2.3347,
      "step": 2567
    },
    {
      "epoch": 0.99774167698696,
      "grad_norm": 0.58984375,
      "learning_rate": 2.3020041457821885e-09,
      "loss": 2.2885,
      "step": 2568
    },
    {
      "epoch": 0.9981302056773755,
      "grad_norm": 0.671875,
      "learning_rate": 1.4732846882137274e-09,
      "loss": 2.3607,
      "step": 2569
    },
    {
      "epoch": 0.9985187343677909,
      "grad_norm": 0.6328125,
      "learning_rate": 8.287235273907179e-10,
      "loss": 2.3445,
      "step": 2570
    },
    {
      "epoch": 0.9989072630582064,
      "grad_norm": 0.62890625,
      "learning_rate": 3.683218503636177e-10,
      "loss": 2.3514,
      "step": 2571
    },
    {
      "epoch": 0.9992957917486219,
      "grad_norm": 0.58984375,
      "learning_rate": 9.208050498754617e-11,
      "loss": 2.3164,
      "step": 2572
    },
    {
      "epoch": 0.9996843204390374,
      "grad_norm": 0.6171875,
      "learning_rate": 0.0,
      "loss": 2.3925,
      "step": 2573
    },
    {
      "epoch": 0.9996843204390374,
      "step": 2573,
      "total_flos": 1.1741952995733012e+19,
      "train_loss": 2.4089672912015,
      "train_runtime": 234538.8638,
      "train_samples_per_second": 1.405,
      "train_steps_per_second": 0.011
    }
  ],
  "logging_steps": 1,
  "max_steps": 2573,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.1741952995733012e+19,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}