{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9999427732102322,
  "eval_steps": 3277,
  "global_step": 13105,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 0.45410123467445374,
      "learning_rate": 2e-05,
      "loss": 1.4213,
      "step": 1
    },
    {
      "epoch": 0.0,
      "eval_loss": NaN,
      "eval_runtime": 310.7561,
      "eval_samples_per_second": 8.882,
      "eval_steps_per_second": 8.882,
      "step": 1
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7251469492912292,
      "learning_rate": 4e-05,
      "loss": 1.8525,
      "step": 2
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2990922927856445,
      "learning_rate": 6e-05,
      "loss": 1.6432,
      "step": 3
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1282109022140503,
      "learning_rate": 8e-05,
      "loss": 2.5206,
      "step": 4
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9063745141029358,
      "learning_rate": 0.0001,
      "loss": 2.1013,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.184556007385254,
      "learning_rate": 0.00012,
      "loss": 2.3232,
      "step": 6
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7850264310836792,
      "learning_rate": 0.00014,
      "loss": 2.0417,
      "step": 7
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.1782026290893555,
      "learning_rate": 0.00016,
      "loss": 2.7906,
      "step": 8
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3861820697784424,
      "learning_rate": 0.00018,
      "loss": 1.5895,
      "step": 9
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2683770656585693,
      "learning_rate": 0.0002,
      "loss": 1.7551,
      "step": 10
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0753273963928223,
      "learning_rate": 0.00019999999712221237,
      "loss": 2.2,
      "step": 11
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9728299975395203,
      "learning_rate": 0.00019999998848884963,
      "loss": 1.5247,
      "step": 12
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3187682628631592,
      "learning_rate": 0.0001999999740999123,
      "loss": 2.774,
      "step": 13
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.406093716621399,
      "learning_rate": 0.00019999995395540116,
      "loss": 1.8251,
      "step": 14
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2768418788909912,
      "learning_rate": 0.00019999992805531743,
      "loss": 2.1411,
      "step": 15
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7694059610366821,
      "learning_rate": 0.00019999989639966258,
      "loss": 1.9957,
      "step": 16
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.5230398178100586,
      "learning_rate": 0.00019999985898843837,
      "loss": 2.3133,
      "step": 17
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1666673421859741,
      "learning_rate": 0.00019999981582164708,
      "loss": 1.9201,
      "step": 18
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8295174241065979,
      "learning_rate": 0.00019999976689929108,
      "loss": 2.2177,
      "step": 19
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.0338170528411865,
      "learning_rate": 0.0001999997122213732,
      "loss": 1.8985,
      "step": 20
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.8884615898132324,
      "learning_rate": 0.00019999965178789668,
      "loss": 1.8915,
      "step": 21
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3841779232025146,
      "learning_rate": 0.00019999958559886488,
      "loss": 1.9702,
      "step": 22
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.810944139957428,
      "learning_rate": 0.0001999995136542817,
      "loss": 2.2155,
      "step": 23
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.9053194522857666,
      "learning_rate": 0.00019999943595415122,
      "loss": 2.2444,
      "step": 24
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8204067349433899,
      "learning_rate": 0.00019999935249847796,
      "loss": 1.884,
      "step": 25
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8992658257484436,
      "learning_rate": 0.00019999926328726666,
      "loss": 2.3219,
      "step": 26
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9724821448326111,
      "learning_rate": 0.00019999916832052253,
      "loss": 1.7276,
      "step": 27
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2136181592941284,
      "learning_rate": 0.00019999906759825097,
      "loss": 1.3715,
      "step": 28
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9139611721038818,
      "learning_rate": 0.00019999896112045783,
      "loss": 1.8478,
      "step": 29
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.5314719676971436,
      "learning_rate": 0.0001999988488871492,
      "loss": 1.8252,
      "step": 30
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3913846015930176,
      "learning_rate": 0.00019999873089833155,
      "loss": 2.2445,
      "step": 31
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6885438561439514,
      "learning_rate": 0.0001999986071540117,
      "loss": 1.3283,
      "step": 32
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5001906156539917,
      "learning_rate": 0.00019999847765419672,
      "loss": 1.7455,
      "step": 33
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.047377347946167,
      "learning_rate": 0.00019999834239889413,
      "loss": 1.9281,
      "step": 34
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4732745885849,
      "learning_rate": 0.00019999820138811165,
      "loss": 1.8637,
      "step": 35
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.7590211629867554,
      "learning_rate": 0.00019999805462185742,
      "loss": 1.3486,
      "step": 36
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5572733879089355,
      "learning_rate": 0.00019999790210013988,
      "loss": 1.6101,
      "step": 37
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3688452243804932,
      "learning_rate": 0.00019999774382296784,
      "loss": 1.4482,
      "step": 38
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5831433534622192,
      "learning_rate": 0.00019999757979035038,
      "loss": 1.6924,
      "step": 39
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9520650506019592,
      "learning_rate": 0.00019999741000229694,
      "loss": 1.6735,
      "step": 40
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2391812801361084,
      "learning_rate": 0.0001999972344588173,
      "loss": 1.1248,
      "step": 41
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.5001944303512573,
      "learning_rate": 0.00019999705315992158,
      "loss": 1.4316,
      "step": 42
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0039854049682617,
      "learning_rate": 0.0001999968661056202,
      "loss": 2.5446,
      "step": 43
    },
    {
      "epoch": 0.0,
      "grad_norm": 2.781242609024048,
      "learning_rate": 0.0001999966732959239,
      "loss": 2.7077,
      "step": 44
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2150217294692993,
      "learning_rate": 0.00019999647473084383,
      "loss": 2.3721,
      "step": 45
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.732982873916626,
      "learning_rate": 0.00019999627041039135,
      "loss": 2.0522,
      "step": 46
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3739509582519531,
      "learning_rate": 0.0001999960603345783,
      "loss": 2.4984,
      "step": 47
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6436686515808105,
      "learning_rate": 0.00019999584450341673,
      "loss": 1.5277,
      "step": 48
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.0502312183380127,
      "learning_rate": 0.00019999562291691904,
      "loss": 2.7712,
      "step": 49
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3200578689575195,
      "learning_rate": 0.00019999539557509803,
      "loss": 2.4895,
      "step": 50
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9032515287399292,
      "learning_rate": 0.00019999516247796675,
      "loss": 2.0312,
      "step": 51
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8698905110359192,
      "learning_rate": 0.00019999492362553862,
      "loss": 1.496,
      "step": 52
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.850273609161377,
      "learning_rate": 0.0001999946790178274,
      "loss": 2.1833,
      "step": 53
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.155576229095459,
      "learning_rate": 0.00019999442865484718,
      "loss": 1.8511,
      "step": 54
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.271774172782898,
      "learning_rate": 0.00019999417253661235,
      "loss": 1.7008,
      "step": 55
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.2948106527328491,
      "learning_rate": 0.00019999391066313765,
      "loss": 1.9062,
      "step": 56
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.868619441986084,
      "learning_rate": 0.00019999364303443818,
      "loss": 2.0243,
      "step": 57
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.6104098558425903,
      "learning_rate": 0.0001999933696505293,
      "loss": 1.2715,
      "step": 58
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1146185398101807,
      "learning_rate": 0.00019999309051142676,
      "loss": 1.5645,
      "step": 59
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7155749201774597,
      "learning_rate": 0.00019999280561714663,
      "loss": 1.9563,
      "step": 60
    },
    {
      "epoch": 0.0,
      "grad_norm": 3.2062759399414062,
      "learning_rate": 0.00019999251496770532,
      "loss": 1.4613,
      "step": 61
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.4700915813446045,
      "learning_rate": 0.00019999221856311955,
      "loss": 2.2156,
      "step": 62
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7746042609214783,
      "learning_rate": 0.00019999191640340637,
      "loss": 1.6155,
      "step": 63
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.3241127729415894,
      "learning_rate": 0.0001999916084885832,
      "loss": 2.2444,
      "step": 64
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8392757773399353,
      "learning_rate": 0.00019999129481866772,
      "loss": 1.5016,
      "step": 65
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6041196584701538,
      "learning_rate": 0.000199990975393678,
      "loss": 2.4986,
      "step": 66
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7917413711547852,
      "learning_rate": 0.00019999065021363244,
      "loss": 1.9794,
      "step": 67
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.103078842163086,
      "learning_rate": 0.00019999031927854977,
      "loss": 1.5014,
      "step": 68
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7925314903259277,
      "learning_rate": 0.000199989982588449,
      "loss": 1.9212,
      "step": 69
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9563289284706116,
      "learning_rate": 0.00019998964014334946,
      "loss": 2.0186,
      "step": 70
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.53257417678833,
      "learning_rate": 0.00019998929194327102,
      "loss": 2.1152,
      "step": 71
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3033921718597412,
      "learning_rate": 0.00019998893798823354,
      "loss": 2.0765,
      "step": 72
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1726081371307373,
      "learning_rate": 0.0001999885782782575,
      "loss": 2.1645,
      "step": 73
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.1491496562957764,
      "learning_rate": 0.00019998821281336357,
      "loss": 1.6975,
      "step": 74
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7246515154838562,
      "learning_rate": 0.00019998784159357282,
      "loss": 2.0747,
      "step": 75
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9769986271858215,
      "learning_rate": 0.00019998746461890655,
      "loss": 2.0839,
      "step": 76
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8007456064224243,
      "learning_rate": 0.00019998708188938654,
      "loss": 2.1084,
      "step": 77
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.295501947402954,
      "learning_rate": 0.00019998669340503476,
      "loss": 1.7898,
      "step": 78
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1990528106689453,
      "learning_rate": 0.00019998629916587356,
      "loss": 1.8521,
      "step": 79
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.048797369003296,
      "learning_rate": 0.00019998589917192568,
      "loss": 1.3538,
      "step": 80
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1559489965438843,
      "learning_rate": 0.0001999854934232141,
      "loss": 2.4244,
      "step": 81
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1407077312469482,
      "learning_rate": 0.00019998508191976217,
      "loss": 1.1343,
      "step": 82
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1128787994384766,
      "learning_rate": 0.00019998466466159361,
      "loss": 1.6265,
      "step": 83
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.5000498294830322,
      "learning_rate": 0.00019998424164873244,
      "loss": 1.7546,
      "step": 84
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0617061853408813,
      "learning_rate": 0.00019998381288120295,
      "loss": 2.2774,
      "step": 85
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8307689428329468,
      "learning_rate": 0.00019998337835902988,
      "loss": 1.3431,
      "step": 86
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8003795146942139,
      "learning_rate": 0.0001999829380822382,
      "loss": 1.7346,
      "step": 87
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8299603462219238,
      "learning_rate": 0.00019998249205085324,
      "loss": 1.6789,
      "step": 88
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7043867111206055,
      "learning_rate": 0.00019998204026490073,
      "loss": 1.5343,
      "step": 89
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.8338234424591064,
      "learning_rate": 0.0001999815827244066,
      "loss": 1.7827,
      "step": 90
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3711857795715332,
      "learning_rate": 0.0001999811194293973,
      "loss": 1.8016,
      "step": 91
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.502826452255249,
      "learning_rate": 0.00019998065037989932,
      "loss": 2.5704,
      "step": 92
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8462558388710022,
      "learning_rate": 0.0001999801755759398,
      "loss": 1.0352,
      "step": 93
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9120460748672485,
      "learning_rate": 0.00019997969501754604,
      "loss": 1.1054,
      "step": 94
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9790778756141663,
      "learning_rate": 0.00019997920870474566,
      "loss": 2.3515,
      "step": 95
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9275172352790833,
      "learning_rate": 0.00019997871663756666,
      "loss": 1.792,
      "step": 96
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9026113748550415,
      "learning_rate": 0.00019997821881603738,
      "loss": 1.9689,
      "step": 97
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7700701355934143,
      "learning_rate": 0.00019997771524018644,
      "loss": 1.648,
      "step": 98
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6022276878356934,
      "learning_rate": 0.00019997720591004287,
      "loss": 2.2045,
      "step": 99
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7849050760269165,
      "learning_rate": 0.00019997669082563597,
      "loss": 1.3531,
      "step": 100
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2068754434585571,
      "learning_rate": 0.00019997616998699538,
      "loss": 2.2431,
      "step": 101
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2810484170913696,
      "learning_rate": 0.00019997564339415108,
      "loss": 1.8088,
      "step": 102
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9880865812301636,
      "learning_rate": 0.00019997511104713334,
      "loss": 2.3223,
      "step": 103
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1048378944396973,
      "learning_rate": 0.00019997457294597287,
      "loss": 2.4059,
      "step": 104
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5569361448287964,
      "learning_rate": 0.00019997402909070059,
      "loss": 1.6789,
      "step": 105
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4416158199310303,
      "learning_rate": 0.00019997347948134782,
      "loss": 1.6051,
      "step": 106
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0987638235092163,
      "learning_rate": 0.00019997292411794618,
      "loss": 1.5945,
      "step": 107
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.0879933834075928,
      "learning_rate": 0.00019997236300052764,
      "loss": 2.1636,
      "step": 108
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8615714907646179,
      "learning_rate": 0.00019997179612912453,
      "loss": 1.9697,
      "step": 109
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9798447489738464,
      "learning_rate": 0.0001999712235037694,
      "loss": 1.965,
      "step": 110
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.958911657333374,
      "learning_rate": 0.00019997064512449527,
      "loss": 1.8725,
      "step": 111
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1410186290740967,
      "learning_rate": 0.00019997006099133543,
      "loss": 2.3657,
      "step": 112
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2786774635314941,
      "learning_rate": 0.0001999694711043235,
      "loss": 1.9744,
      "step": 113
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.985195517539978,
      "learning_rate": 0.0001999688754634934,
      "loss": 1.6957,
      "step": 114
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5668585300445557,
      "learning_rate": 0.0001999682740688794,
      "loss": 1.3409,
      "step": 115
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1969308853149414,
      "learning_rate": 0.00019996766692051616,
      "loss": 1.0011,
      "step": 116
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7858453989028931,
      "learning_rate": 0.00019996705401843863,
      "loss": 1.7107,
      "step": 117
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.4280121326446533,
      "learning_rate": 0.00019996643536268204,
      "loss": 2.4154,
      "step": 118
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8642813563346863,
      "learning_rate": 0.00019996581095328198,
      "loss": 2.0532,
      "step": 119
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7351417541503906,
      "learning_rate": 0.0001999651807902745,
      "loss": 1.9147,
      "step": 120
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.47463321685791,
      "learning_rate": 0.00019996454487369576,
      "loss": 1.8741,
      "step": 121
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6731791496276855,
      "learning_rate": 0.0001999639032035824,
      "loss": 2.149,
      "step": 122
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2084543704986572,
      "learning_rate": 0.00019996325577997134,
      "loss": 1.4131,
      "step": 123
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.059240460395813,
      "learning_rate": 0.00019996260260289988,
      "loss": 1.4974,
      "step": 124
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.814556896686554,
      "learning_rate": 0.00019996194367240556,
      "loss": 1.9416,
      "step": 125
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.241984248161316,
      "learning_rate": 0.00019996127898852634,
      "loss": 1.7025,
      "step": 126
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8262917399406433,
      "learning_rate": 0.00019996060855130047,
      "loss": 2.2607,
      "step": 127
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8986636400222778,
      "learning_rate": 0.00019995993236076654,
      "loss": 2.1107,
      "step": 128
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5485140681266785,
      "learning_rate": 0.0001999592504169635,
      "loss": 1.6071,
      "step": 129
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.751686155796051,
      "learning_rate": 0.0001999585627199305,
      "loss": 2.2697,
      "step": 130
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.041542887687683,
      "learning_rate": 0.0001999578692697072,
      "loss": 2.1305,
      "step": 131
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9713752269744873,
      "learning_rate": 0.0001999571700663335,
      "loss": 2.1648,
      "step": 132
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9641210436820984,
      "learning_rate": 0.00019995646510984966,
      "loss": 1.6729,
      "step": 133
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.0673718452453613,
      "learning_rate": 0.00019995575440029623,
      "loss": 2.2571,
      "step": 134
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9364503622055054,
      "learning_rate": 0.00019995503793771412,
      "loss": 2.3535,
      "step": 135
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0308454036712646,
      "learning_rate": 0.00019995431572214454,
      "loss": 1.4252,
      "step": 136
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2035439014434814,
      "learning_rate": 0.0001999535877536291,
      "loss": 2.1617,
      "step": 137
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9525237083435059,
      "learning_rate": 0.0001999528540322097,
      "loss": 1.9498,
      "step": 138
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1666916608810425,
      "learning_rate": 0.00019995211455792856,
      "loss": 1.5342,
      "step": 139
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.647475004196167,
      "learning_rate": 0.00019995136933082818,
      "loss": 1.6728,
      "step": 140
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7077816128730774,
      "learning_rate": 0.00019995061835095156,
      "loss": 1.9287,
      "step": 141
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4593638181686401,
      "learning_rate": 0.00019994986161834185,
      "loss": 1.6351,
      "step": 142
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2713106870651245,
      "learning_rate": 0.00019994909913304261,
      "loss": 2.0217,
      "step": 143
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.765741765499115,
      "learning_rate": 0.00019994833089509776,
      "loss": 0.8894,
      "step": 144
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4134337902069092,
      "learning_rate": 0.00019994755690455152,
      "loss": 1.7744,
      "step": 145
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.074459195137024,
      "learning_rate": 0.00019994677716144837,
      "loss": 1.8147,
      "step": 146
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.797827959060669,
      "learning_rate": 0.00019994599166583323,
      "loss": 1.9437,
      "step": 147
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8741801977157593,
      "learning_rate": 0.00019994520041775133,
      "loss": 2.1618,
      "step": 148
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3261075019836426,
      "learning_rate": 0.00019994440341724817,
      "loss": 1.8104,
      "step": 149
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7641209363937378,
      "learning_rate": 0.00019994360066436968,
      "loss": 1.9496,
      "step": 150
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5569089651107788,
      "learning_rate": 0.00019994279215916204,
      "loss": 1.8064,
      "step": 151
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8460859656333923,
      "learning_rate": 0.0001999419779016717,
      "loss": 1.65,
      "step": 152
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.07931649684906,
      "learning_rate": 0.00019994115789194564,
      "loss": 1.7287,
      "step": 153
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2176053524017334,
      "learning_rate": 0.00019994033213003104,
      "loss": 1.3766,
      "step": 154
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5501635074615479,
      "learning_rate": 0.00019993950061597535,
      "loss": 2.0094,
      "step": 155
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2299693822860718,
      "learning_rate": 0.00019993866334982652,
      "loss": 1.86,
      "step": 156
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8403663039207458,
      "learning_rate": 0.00019993782033163267,
      "loss": 1.5842,
      "step": 157
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9393737316131592,
      "learning_rate": 0.00019993697156144236,
      "loss": 1.4391,
      "step": 158
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.615298867225647,
      "learning_rate": 0.00019993611703930444,
      "loss": 1.579,
      "step": 159
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.726044237613678,
      "learning_rate": 0.00019993525676526805,
      "loss": 1.9981,
      "step": 160
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6733853220939636,
      "learning_rate": 0.00019993439073938277,
      "loss": 1.7055,
      "step": 161
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8863446712493896,
      "learning_rate": 0.00019993351896169841,
      "loss": 2.2111,
      "step": 162
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.971258282661438,
      "learning_rate": 0.00019993264143226513,
      "loss": 2.5616,
      "step": 163
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7040120959281921,
      "learning_rate": 0.0001999317581511335,
      "loss": 1.7636,
      "step": 164
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8001137971878052,
      "learning_rate": 0.00019993086911835426,
      "loss": 1.5723,
      "step": 165
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2831052541732788,
      "learning_rate": 0.00019992997433397865,
      "loss": 2.0057,
      "step": 166
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9766820669174194,
      "learning_rate": 0.00019992907379805814,
      "loss": 1.3186,
      "step": 167
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3188194036483765,
      "learning_rate": 0.00019992816751064458,
      "loss": 2.3432,
      "step": 168
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5910422801971436,
      "learning_rate": 0.00019992725547179015,
      "loss": 1.7265,
      "step": 169
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.2455086708068848,
      "learning_rate": 0.00019992633768154726,
      "loss": 1.8205,
      "step": 170
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7536017298698425,
      "learning_rate": 0.00019992541413996884,
      "loss": 2.0064,
      "step": 171
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7778609991073608,
      "learning_rate": 0.00019992448484710797,
      "loss": 1.922,
      "step": 172
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8164350390434265,
      "learning_rate": 0.00019992354980301818,
      "loss": 1.9699,
      "step": 173
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9734108448028564,
      "learning_rate": 0.00019992260900775327,
      "loss": 1.8403,
      "step": 174
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7634499669075012,
      "learning_rate": 0.00019992166246136738,
      "loss": 1.6053,
      "step": 175
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7739602327346802,
      "learning_rate": 0.000199920710163915,
      "loss": 1.3422,
      "step": 176
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.053848147392273,
      "learning_rate": 0.00019991975211545096,
      "loss": 2.0069,
      "step": 177
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1053581237792969,
      "learning_rate": 0.00019991878831603034,
      "loss": 1.5055,
      "step": 178
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9662088751792908,
      "learning_rate": 0.00019991781876570866,
      "loss": 1.544,
      "step": 179
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.874933958053589,
      "learning_rate": 0.00019991684346454172,
      "loss": 2.2153,
      "step": 180
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9161741733551025,
      "learning_rate": 0.00019991586241258565,
      "loss": 2.2141,
      "step": 181
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0879689455032349,
      "learning_rate": 0.00019991487560989694,
      "loss": 1.6142,
      "step": 182
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.668998122215271,
      "learning_rate": 0.00019991388305653232,
      "loss": 1.6554,
      "step": 183
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1204408407211304,
      "learning_rate": 0.00019991288475254898,
      "loss": 1.7809,
      "step": 184
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7213574051856995,
      "learning_rate": 0.00019991188069800432,
      "loss": 1.8458,
      "step": 185
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0164674520492554,
      "learning_rate": 0.0001999108708929562,
      "loss": 2.1485,
      "step": 186
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8731221556663513,
      "learning_rate": 0.00019990985533746268,
      "loss": 1.6672,
      "step": 187
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9952435493469238,
      "learning_rate": 0.00019990883403158227,
      "loss": 1.9197,
      "step": 188
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3534680604934692,
      "learning_rate": 0.0001999078069753737,
      "loss": 1.8587,
      "step": 189
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7056375741958618,
      "learning_rate": 0.00019990677416889608,
      "loss": 1.2476,
      "step": 190
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0682339668273926,
      "learning_rate": 0.0001999057356122089,
      "loss": 1.9533,
      "step": 191
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6645064949989319,
      "learning_rate": 0.00019990469130537188,
      "loss": 2.0364,
      "step": 192
    },
    {
      "epoch": 0.01,
      "grad_norm": 4.621946334838867,
      "learning_rate": 0.00019990364124844519,
      "loss": 1.6713,
      "step": 193
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9170424342155457,
      "learning_rate": 0.00019990258544148924,
      "loss": 1.8842,
      "step": 194
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.463300108909607,
      "learning_rate": 0.00019990152388456474,
      "loss": 1.3061,
      "step": 195
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2130684852600098,
      "learning_rate": 0.00019990045657773288,
      "loss": 1.9982,
      "step": 196
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0000176429748535,
      "learning_rate": 0.00019989938352105502,
      "loss": 2.3921,
      "step": 197
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.8468453884124756,
      "learning_rate": 0.00019989830471459297,
      "loss": 1.9416,
      "step": 198
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6441767811775208,
      "learning_rate": 0.0001998972201584088,
      "loss": 1.9687,
      "step": 199
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9528114199638367,
      "learning_rate": 0.00019989612985256492,
      "loss": 2.091,
      "step": 200
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6330357789993286,
      "learning_rate": 0.0001998950337971241,
      "loss": 1.8036,
      "step": 201
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.0257372856140137,
      "learning_rate": 0.0001998939319921494,
      "loss": 1.8753,
      "step": 202
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7665389180183411,
      "learning_rate": 0.0001998928244377043,
      "loss": 2.4063,
      "step": 203
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5956721305847168,
      "learning_rate": 0.0001998917111338525,
      "loss": 1.544,
      "step": 204
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9187506437301636,
      "learning_rate": 0.000199890592080658,
      "loss": 1.4546,
      "step": 205
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6023858785629272,
      "learning_rate": 0.00019988946727818535,
      "loss": 1.415,
      "step": 206
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8478423357009888,
      "learning_rate": 0.00019988833672649923,
      "loss": 1.877,
      "step": 207
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8589894771575928,
      "learning_rate": 0.0001998872004256647,
      "loss": 2.3487,
      "step": 208
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7884153127670288,
      "learning_rate": 0.00019988605837574714,
      "loss": 1.2118,
      "step": 209
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9963406920433044,
      "learning_rate": 0.00019988491057681233,
      "loss": 2.2112,
      "step": 210
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2923096418380737,
      "learning_rate": 0.0001998837570289263,
      "loss": 2.3699,
      "step": 211
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9000422358512878,
      "learning_rate": 0.0001998825977321555,
      "loss": 1.478,
      "step": 212
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1342089176177979,
      "learning_rate": 0.00019988143268656654,
      "loss": 2.2826,
      "step": 213
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8456466794013977,
      "learning_rate": 0.00019988026189222654,
      "loss": 2.1626,
      "step": 214
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.089813470840454,
      "learning_rate": 0.00019987908534920294,
      "loss": 1.1538,
      "step": 215
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1077651977539062,
      "learning_rate": 0.0001998779030575634,
      "loss": 2.4401,
      "step": 216
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.707053303718567,
      "learning_rate": 0.00019987671501737594,
      "loss": 1.8087,
      "step": 217
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5713979005813599,
      "learning_rate": 0.00019987552122870898,
      "loss": 2.6532,
      "step": 218
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.817415714263916,
      "learning_rate": 0.00019987432169163124,
      "loss": 2.0425,
      "step": 219
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.245464563369751,
      "learning_rate": 0.00019987311640621173,
      "loss": 1.98,
      "step": 220
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1996911764144897,
      "learning_rate": 0.0001998719053725198,
      "loss": 2.3816,
      "step": 221
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8013570308685303,
      "learning_rate": 0.00019987068859062524,
      "loss": 1.6611,
      "step": 222
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.281355857849121,
      "learning_rate": 0.000199869466060598,
      "loss": 1.9847,
      "step": 223
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7554328441619873,
      "learning_rate": 0.00019986823778250846,
      "loss": 2.7592,
      "step": 224
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1366513967514038,
      "learning_rate": 0.00019986700375642733,
      "loss": 1.8246,
      "step": 225
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.080142855644226,
      "learning_rate": 0.00019986576398242566,
      "loss": 2.1135,
      "step": 226
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5567632913589478,
      "learning_rate": 0.00019986451846057475,
      "loss": 1.102,
      "step": 227
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.162177324295044,
      "learning_rate": 0.0001998632671909463,
      "loss": 2.0391,
      "step": 228
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0035229921340942,
      "learning_rate": 0.00019986201017361236,
      "loss": 2.3329,
      "step": 229
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3181344270706177,
      "learning_rate": 0.00019986074740864526,
      "loss": 2.2274,
      "step": 230
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8652102947235107,
      "learning_rate": 0.00019985947889611768,
      "loss": 1.3152,
      "step": 231
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8723015189170837,
      "learning_rate": 0.00019985820463610263,
      "loss": 1.5977,
      "step": 232
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8724658489227295,
      "learning_rate": 0.00019985692462867345,
      "loss": 1.4771,
      "step": 233
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8430564403533936,
      "learning_rate": 0.00019985563887390382,
      "loss": 1.6379,
      "step": 234
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.082324266433716,
      "learning_rate": 0.0001998543473718677,
      "loss": 2.0449,
      "step": 235
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5420116186141968,
      "learning_rate": 0.0001998530501226395,
      "loss": 1.7968,
      "step": 236
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.135408878326416,
      "learning_rate": 0.00019985174712629382,
      "loss": 2.2123,
      "step": 237
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7866160869598389,
      "learning_rate": 0.00019985043838290567,
      "loss": 1.8697,
      "step": 238
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9751622080802917,
      "learning_rate": 0.0001998491238925504,
      "loss": 1.5772,
      "step": 239
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6129955053329468,
      "learning_rate": 0.0001998478036553036,
      "loss": 1.688,
      "step": 240
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8484302163124084,
      "learning_rate": 0.00019984647767124137,
      "loss": 2.6575,
      "step": 241
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7841575145721436,
      "learning_rate": 0.00019984514594043997,
      "loss": 2.1221,
      "step": 242
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6913597583770752,
      "learning_rate": 0.000199843808462976,
      "loss": 1.1017,
      "step": 243
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6880347728729248,
      "learning_rate": 0.0001998424652389265,
      "loss": 2.4555,
      "step": 244
    },
    {
      "epoch": 0.02,
      "grad_norm": 5.132293701171875,
      "learning_rate": 0.00019984111626836877,
      "loss": 2.2235,
      "step": 245
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7611050009727478,
      "learning_rate": 0.0001998397615513804,
      "loss": 2.0753,
      "step": 246
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1007938385009766,
      "learning_rate": 0.0001998384010880395,
      "loss": 1.7038,
      "step": 247
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7455108165740967,
      "learning_rate": 0.0001998370348784242,
      "loss": 1.9125,
      "step": 248
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1858012676239014,
      "learning_rate": 0.00019983566292261325,
      "loss": 2.5179,
      "step": 249
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7544388771057129,
      "learning_rate": 0.00019983428522068556,
      "loss": 1.8363,
      "step": 250
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0706084966659546,
      "learning_rate": 0.00019983290177272043,
      "loss": 1.4831,
      "step": 251
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9289191961288452,
      "learning_rate": 0.0001998315125787975,
      "loss": 1.4766,
      "step": 252
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.027898907661438,
      "learning_rate": 0.00019983011763899673,
      "loss": 1.9899,
      "step": 253
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2141337394714355,
      "learning_rate": 0.00019982871695339838,
      "loss": 1.1595,
      "step": 254
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9989245533943176,
      "learning_rate": 0.00019982731052208309,
      "loss": 1.1361,
      "step": 255
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6774826049804688,
      "learning_rate": 0.00019982589834513184,
      "loss": 2.1019,
      "step": 256
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7727795243263245,
      "learning_rate": 0.00019982448042262584,
      "loss": 1.9289,
      "step": 257
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8177581429481506,
      "learning_rate": 0.00019982305675464673,
      "loss": 2.2448,
      "step": 258
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0444766283035278,
      "learning_rate": 0.00019982162734127648,
      "loss": 2.2389,
      "step": 259
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.218345046043396,
      "learning_rate": 0.0001998201921825973,
      "loss": 1.9499,
      "step": 260
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3579437732696533,
      "learning_rate": 0.00019981875127869186,
      "loss": 2.2892,
      "step": 261
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9824919700622559,
      "learning_rate": 0.00019981730462964302,
      "loss": 2.3904,
      "step": 262
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1893577575683594,
      "learning_rate": 0.00019981585223553415,
      "loss": 2.2047,
      "step": 263
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.428501844406128,
      "learning_rate": 0.0001998143940964487,
      "loss": 1.7942,
      "step": 264
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7429963946342468,
      "learning_rate": 0.0001998129302124707,
      "loss": 2.0621,
      "step": 265
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7093138694763184,
      "learning_rate": 0.00019981146058368435,
      "loss": 1.811,
      "step": 266
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9486374855041504,
      "learning_rate": 0.00019980998521017428,
      "loss": 1.7163,
      "step": 267
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.956876754760742,
      "learning_rate": 0.00019980850409202541,
      "loss": 2.8648,
      "step": 268
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1991487741470337,
      "learning_rate": 0.00019980701722932293,
      "loss": 1.6434,
      "step": 269
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8741233944892883,
      "learning_rate": 0.00019980552462215244,
      "loss": 2.1506,
      "step": 270
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0820194482803345,
      "learning_rate": 0.00019980402627059989,
      "loss": 2.1971,
      "step": 271
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.218521237373352,
      "learning_rate": 0.00019980252217475147,
      "loss": 1.973,
      "step": 272
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7852482199668884,
      "learning_rate": 0.00019980101233469378,
      "loss": 1.9439,
      "step": 273
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7520068287849426,
      "learning_rate": 0.00019979949675051368,
      "loss": 1.5185,
      "step": 274
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1398406028747559,
      "learning_rate": 0.00019979797542229845,
      "loss": 1.8628,
      "step": 275
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9981680512428284,
      "learning_rate": 0.00019979644835013563,
      "loss": 1.574,
      "step": 276
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.411102056503296,
      "learning_rate": 0.00019979491553411309,
      "loss": 1.9833,
      "step": 277
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9486768245697021,
      "learning_rate": 0.0001997933769743191,
      "loss": 1.9586,
      "step": 278
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0804142951965332,
      "learning_rate": 0.00019979183267084216,
      "loss": 2.9337,
      "step": 279
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7289422154426575,
      "learning_rate": 0.00019979028262377118,
      "loss": 1.3351,
      "step": 280
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.171309232711792,
      "learning_rate": 0.00019978872683319536,
      "loss": 1.7678,
      "step": 281
    },
    {
      "epoch": 0.02,
      "grad_norm": 4.117319583892822,
      "learning_rate": 0.0001997871652992043,
      "loss": 2.884,
      "step": 282
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8022302389144897,
      "learning_rate": 0.0001997855980218878,
      "loss": 1.9695,
      "step": 283
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7802802920341492,
      "learning_rate": 0.0001997840250013361,
      "loss": 1.9984,
      "step": 284
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2498486042022705,
      "learning_rate": 0.00019978244623763974,
      "loss": 1.3041,
      "step": 285
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9428611993789673,
      "learning_rate": 0.0001997808617308896,
      "loss": 2.1988,
      "step": 286
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.804100751876831,
      "learning_rate": 0.00019977927148117687,
      "loss": 1.0112,
      "step": 287
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7393108010292053,
      "learning_rate": 0.000199777675488593,
      "loss": 1.6,
      "step": 288
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3401312828063965,
      "learning_rate": 0.00019977607375322998,
      "loss": 2.0307,
      "step": 289
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6161187887191772,
      "learning_rate": 0.00019977446627517987,
      "loss": 1.5814,
      "step": 290
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9855930209159851,
      "learning_rate": 0.00019977285305453532,
      "loss": 2.3217,
      "step": 291
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1584293842315674,
      "learning_rate": 0.0001997712340913891,
      "loss": 2.3326,
      "step": 292
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.426003932952881,
      "learning_rate": 0.0001997696093858344,
      "loss": 1.7719,
      "step": 293
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0472297668457031,
      "learning_rate": 0.00019976797893796473,
      "loss": 2.0751,
      "step": 294
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8439749479293823,
      "learning_rate": 0.00019976634274787393,
      "loss": 1.196,
      "step": 295
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5824000835418701,
      "learning_rate": 0.0001997647008156562,
      "loss": 2.3454,
      "step": 296
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8545791506767273,
      "learning_rate": 0.000199763053141406,
      "loss": 2.2709,
      "step": 297
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2668187618255615,
      "learning_rate": 0.00019976139972521822,
      "loss": 1.9702,
      "step": 298
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8402678370475769,
      "learning_rate": 0.00019975974056718796,
      "loss": 1.5973,
      "step": 299
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4824697971343994,
      "learning_rate": 0.00019975807566741075,
      "loss": 2.5337,
      "step": 300
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9373719096183777,
      "learning_rate": 0.00019975640502598244,
      "loss": 1.9289,
      "step": 301
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1827287673950195,
      "learning_rate": 0.00019975472864299914,
      "loss": 1.7936,
      "step": 302
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.5912702679634094,
      "learning_rate": 0.0001997530465185573,
      "loss": 1.5722,
      "step": 303
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6746929883956909,
      "learning_rate": 0.00019975135865275384,
      "loss": 1.3921,
      "step": 304
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.9725465178489685,
      "learning_rate": 0.00019974966504568583,
      "loss": 2.2774,
      "step": 305
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.305797815322876,
      "learning_rate": 0.00019974796569745078,
      "loss": 1.9387,
      "step": 306
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.229654312133789,
      "learning_rate": 0.00019974626060814647,
      "loss": 1.8881,
      "step": 307
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.225529670715332,
      "learning_rate": 0.00019974454977787106,
      "loss": 1.7861,
      "step": 308
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.234098196029663,
      "learning_rate": 0.000199742833206723,
      "loss": 2.345,
      "step": 309
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7822078466415405,
      "learning_rate": 0.00019974111089480112,
      "loss": 1.8486,
      "step": 310
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.728205919265747,
      "learning_rate": 0.0001997393828422045,
      "loss": 2.3282,
      "step": 311
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.7699952125549316,
      "learning_rate": 0.00019973764904903266,
      "loss": 1.911,
      "step": 312
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.265279769897461,
      "learning_rate": 0.00019973590951538535,
      "loss": 1.5183,
      "step": 313
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.065476655960083,
      "learning_rate": 0.0001997341642413627,
      "loss": 1.6623,
      "step": 314
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1003106832504272,
      "learning_rate": 0.00019973241322706513,
      "loss": 2.0589,
      "step": 315
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.817277193069458,
      "learning_rate": 0.00019973065647259348,
      "loss": 1.6957,
      "step": 316
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0311956405639648,
      "learning_rate": 0.00019972889397804882,
      "loss": 1.8317,
      "step": 317
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4302384853363037,
      "learning_rate": 0.0001997271257435326,
      "loss": 1.6324,
      "step": 318
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2113277912139893,
      "learning_rate": 0.0001997253517691466,
      "loss": 2.0998,
      "step": 319
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4546974897384644,
      "learning_rate": 0.00019972357205499293,
      "loss": 2.3111,
      "step": 320
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0362048149108887,
      "learning_rate": 0.000199721786601174,
      "loss": 1.699,
      "step": 321
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.066162586212158,
      "learning_rate": 0.00019971999540779257,
      "loss": 1.6264,
      "step": 322
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8762815594673157,
      "learning_rate": 0.00019971819847495174,
      "loss": 1.9989,
      "step": 323
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.6877384781837463,
      "learning_rate": 0.00019971639580275497,
      "loss": 1.4287,
      "step": 324
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.458845853805542,
      "learning_rate": 0.00019971458739130598,
      "loss": 1.9527,
      "step": 325
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.8904917240142822,
      "learning_rate": 0.00019971277324070882,
      "loss": 1.9034,
      "step": 326
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1318106651306152,
      "learning_rate": 0.00019971095335106798,
      "loss": 2.0233,
      "step": 327
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7414654493331909,
      "learning_rate": 0.00019970912772248817,
      "loss": 1.9179,
      "step": 328
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.952807903289795,
      "learning_rate": 0.00019970729635507446,
      "loss": 1.7301,
      "step": 329
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0126516819000244,
      "learning_rate": 0.00019970545924893226,
      "loss": 1.919,
      "step": 330
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3724552392959595,
      "learning_rate": 0.0001997036164041673,
      "loss": 1.58,
      "step": 331
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9053062796592712,
      "learning_rate": 0.00019970176782088564,
      "loss": 1.0669,
      "step": 332
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7937864661216736,
      "learning_rate": 0.0001996999134991937,
      "loss": 1.9044,
      "step": 333
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1764119863510132,
      "learning_rate": 0.00019969805343919821,
      "loss": 2.0231,
      "step": 334
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1053706407546997,
      "learning_rate": 0.00019969618764100621,
      "loss": 1.771,
      "step": 335
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7973354458808899,
      "learning_rate": 0.00019969431610472507,
      "loss": 1.0391,
      "step": 336
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8821333050727844,
      "learning_rate": 0.00019969243883046254,
      "loss": 1.3576,
      "step": 337
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7038580179214478,
      "learning_rate": 0.00019969055581832667,
      "loss": 1.9757,
      "step": 338
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8088635206222534,
      "learning_rate": 0.00019968866706842578,
      "loss": 1.9211,
      "step": 339
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.026418924331665,
      "learning_rate": 0.00019968677258086866,
      "loss": 1.6674,
      "step": 340
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5246156454086304,
      "learning_rate": 0.0001996848723557643,
      "loss": 1.9157,
      "step": 341
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.515990972518921,
      "learning_rate": 0.0001996829663932221,
      "loss": 1.8046,
      "step": 342
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3907099962234497,
      "learning_rate": 0.0001996810546933517,
      "loss": 1.6799,
      "step": 343
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6272425651550293,
      "learning_rate": 0.0001996791372562632,
      "loss": 1.7037,
      "step": 344
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8179852366447449,
      "learning_rate": 0.00019967721408206695,
      "loss": 1.9273,
      "step": 345
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8940801024436951,
      "learning_rate": 0.00019967528517087358,
      "loss": 1.797,
      "step": 346
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9504759311676025,
      "learning_rate": 0.00019967335052279418,
      "loss": 2.0648,
      "step": 347
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.246158242225647,
      "learning_rate": 0.00019967141013794003,
      "loss": 1.9323,
      "step": 348
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.09738290309906,
      "learning_rate": 0.00019966946401642286,
      "loss": 1.835,
      "step": 349
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.151884913444519,
      "learning_rate": 0.00019966751215835465,
      "loss": 1.8152,
      "step": 350
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8854889869689941,
      "learning_rate": 0.0001996655545638478,
      "loss": 1.7393,
      "step": 351
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9428744316101074,
      "learning_rate": 0.00019966359123301493,
      "loss": 1.4151,
      "step": 352
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.993353009223938,
      "learning_rate": 0.000199661622165969,
      "loss": 1.286,
      "step": 353
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0609303712844849,
      "learning_rate": 0.00019965964736282346,
      "loss": 1.9335,
      "step": 354
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8277542591094971,
      "learning_rate": 0.00019965766682369186,
      "loss": 1.8575,
      "step": 355
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6149780750274658,
      "learning_rate": 0.00019965568054868826,
      "loss": 1.8291,
      "step": 356
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0144153833389282,
      "learning_rate": 0.00019965368853792694,
      "loss": 1.3893,
      "step": 357
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1595757007598877,
      "learning_rate": 0.0001996516907915226,
      "loss": 1.478,
      "step": 358
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2564284801483154,
      "learning_rate": 0.00019964968730959014,
      "loss": 1.8689,
      "step": 359
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2424649000167847,
      "learning_rate": 0.00019964767809224496,
      "loss": 1.9762,
      "step": 360
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0488390922546387,
      "learning_rate": 0.00019964566313960264,
      "loss": 1.3674,
      "step": 361
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2761868238449097,
      "learning_rate": 0.00019964364245177922,
      "loss": 1.0024,
      "step": 362
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2921425104141235,
      "learning_rate": 0.00019964161602889093,
      "loss": 2.0791,
      "step": 363
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.443358898162842,
      "learning_rate": 0.00019963958387105441,
      "loss": 3.254,
      "step": 364
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7699267864227295,
      "learning_rate": 0.00019963754597838667,
      "loss": 2.4841,
      "step": 365
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.987548589706421,
      "learning_rate": 0.00019963550235100496,
      "loss": 2.0013,
      "step": 366
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2103132009506226,
      "learning_rate": 0.0001996334529890269,
      "loss": 1.3938,
      "step": 367
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2532306909561157,
      "learning_rate": 0.00019963139789257054,
      "loss": 1.8838,
      "step": 368
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2966642379760742,
      "learning_rate": 0.000199629337061754,
      "loss": 1.8002,
      "step": 369
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8649606108665466,
      "learning_rate": 0.000199627270496696,
      "loss": 1.6922,
      "step": 370
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0057348012924194,
      "learning_rate": 0.00019962519819751546,
      "loss": 1.1645,
      "step": 371
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.637627363204956,
      "learning_rate": 0.00019962312016433167,
      "loss": 1.6489,
      "step": 372
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1382900476455688,
      "learning_rate": 0.0001996210363972642,
      "loss": 1.7392,
      "step": 373
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6587722301483154,
      "learning_rate": 0.000199618946896433,
      "loss": 2.0092,
      "step": 374
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8325609564781189,
      "learning_rate": 0.0001996168516619583,
      "loss": 1.4445,
      "step": 375
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7013490200042725,
      "learning_rate": 0.00019961475069396075,
      "loss": 1.643,
      "step": 376
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.77008318901062,
      "learning_rate": 0.00019961264399256124,
      "loss": 2.351,
      "step": 377
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8842824101448059,
      "learning_rate": 0.00019961053155788104,
      "loss": 2.1294,
      "step": 378
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2133740186691284,
      "learning_rate": 0.0001996084133900417,
      "loss": 1.59,
      "step": 379
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.005385160446167,
      "learning_rate": 0.00019960628948916518,
      "loss": 1.6225,
      "step": 380
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7907528281211853,
      "learning_rate": 0.00019960415985537368,
      "loss": 1.3604,
      "step": 381
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0974928140640259,
      "learning_rate": 0.00019960202448878977,
      "loss": 1.6013,
      "step": 382
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1914106607437134,
      "learning_rate": 0.00019959988338953642,
      "loss": 1.4437,
      "step": 383
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7708309888839722,
      "learning_rate": 0.00019959773655773674,
      "loss": 2.228,
      "step": 384
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1300514936447144,
      "learning_rate": 0.00019959558399351444,
      "loss": 1.4486,
      "step": 385
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9277282357215881,
      "learning_rate": 0.0001995934256969933,
      "loss": 1.8026,
      "step": 386
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6273610591888428,
      "learning_rate": 0.0001995912616682976,
      "loss": 1.1808,
      "step": 387
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8576773405075073,
      "learning_rate": 0.00019958909190755187,
      "loss": 1.7813,
      "step": 388
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7719049453735352,
      "learning_rate": 0.00019958691641488096,
      "loss": 1.4685,
      "step": 389
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.335814118385315,
      "learning_rate": 0.00019958473519041017,
      "loss": 1.8107,
      "step": 390
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8164095282554626,
      "learning_rate": 0.00019958254823426497,
      "loss": 1.9289,
      "step": 391
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2071846723556519,
      "learning_rate": 0.00019958035554657125,
      "loss": 1.272,
      "step": 392
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8431015610694885,
      "learning_rate": 0.00019957815712745523,
      "loss": 1.7626,
      "step": 393
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9683548808097839,
      "learning_rate": 0.0001995759529770434,
      "loss": 1.8147,
      "step": 394
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1180576086044312,
      "learning_rate": 0.00019957374309546264,
      "loss": 2.0156,
      "step": 395
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7776530385017395,
      "learning_rate": 0.00019957152748284018,
      "loss": 1.8817,
      "step": 396
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6860253810882568,
      "learning_rate": 0.0001995693061393035,
      "loss": 1.0944,
      "step": 397
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0393496751785278,
      "learning_rate": 0.00019956707906498044,
      "loss": 1.9973,
      "step": 398
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8382661938667297,
      "learning_rate": 0.00019956484625999923,
      "loss": 1.5146,
      "step": 399
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1395013332366943,
      "learning_rate": 0.00019956260772448833,
      "loss": 2.3457,
      "step": 400
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9239707589149475,
      "learning_rate": 0.00019956036345857662,
      "loss": 2.2234,
      "step": 401
    },
    {
      "epoch": 0.03,
      "grad_norm": 6.141939640045166,
      "learning_rate": 0.00019955811346239325,
      "loss": 1.7306,
      "step": 402
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9339370131492615,
      "learning_rate": 0.00019955585773606771,
      "loss": 2.0505,
      "step": 403
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.941452145576477,
      "learning_rate": 0.00019955359627972988,
      "loss": 1.6982,
      "step": 404
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2828582525253296,
      "learning_rate": 0.00019955132909350984,
      "loss": 1.9691,
      "step": 405
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0828039646148682,
      "learning_rate": 0.00019954905617753814,
      "loss": 1.7594,
      "step": 406
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1006799936294556,
      "learning_rate": 0.00019954677753194558,
      "loss": 1.7048,
      "step": 407
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1713374853134155,
      "learning_rate": 0.0001995444931568633,
      "loss": 1.4476,
      "step": 408
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8281381130218506,
      "learning_rate": 0.0001995422030524228,
      "loss": 1.9355,
      "step": 409
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9969196915626526,
      "learning_rate": 0.00019953990721875592,
      "loss": 1.9453,
      "step": 410
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.692786931991577,
      "learning_rate": 0.00019953760565599471,
      "loss": 1.9574,
      "step": 411
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2468665838241577,
      "learning_rate": 0.00019953529836427172,
      "loss": 1.6904,
      "step": 412
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.739721417427063,
      "learning_rate": 0.00019953298534371966,
      "loss": 1.4738,
      "step": 413
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7801087498664856,
      "learning_rate": 0.00019953066659447174,
      "loss": 1.3677,
      "step": 414
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2312002182006836,
      "learning_rate": 0.0001995283421166614,
      "loss": 2.293,
      "step": 415
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1737828254699707,
      "learning_rate": 0.0001995260119104224,
      "loss": 1.633,
      "step": 416
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9620623588562012,
      "learning_rate": 0.0001995236759758889,
      "loss": 2.3641,
      "step": 417
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0960651636123657,
      "learning_rate": 0.0001995213343131953,
      "loss": 1.9276,
      "step": 418
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.4064154624938965,
      "learning_rate": 0.0001995189869224764,
      "loss": 1.3995,
      "step": 419
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6602115631103516,
      "learning_rate": 0.0001995166338038673,
      "loss": 1.8657,
      "step": 420
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8541923761367798,
      "learning_rate": 0.0001995142749575034,
      "loss": 1.7183,
      "step": 421
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2181060314178467,
      "learning_rate": 0.00019951191038352058,
      "loss": 1.6872,
      "step": 422
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.793938934803009,
      "learning_rate": 0.0001995095400820548,
      "loss": 2.017,
      "step": 423
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9077907204627991,
      "learning_rate": 0.00019950716405324253,
      "loss": 2.0289,
      "step": 424
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6919528245925903,
      "learning_rate": 0.00019950478229722055,
      "loss": 2.7377,
      "step": 425
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4002418518066406,
      "learning_rate": 0.00019950239481412595,
      "loss": 2.2713,
      "step": 426
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.285338282585144,
      "learning_rate": 0.0001995000016040961,
      "loss": 1.7612,
      "step": 427
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9478429555892944,
      "learning_rate": 0.0001994976026672688,
      "loss": 1.181,
      "step": 428
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.658583641052246,
      "learning_rate": 0.00019949519800378207,
      "loss": 2.0664,
      "step": 429
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.538787603378296,
      "learning_rate": 0.0001994927876137743,
      "loss": 1.5557,
      "step": 430
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7657317519187927,
      "learning_rate": 0.00019949037149738426,
      "loss": 1.9928,
      "step": 431
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.3424248695373535,
      "learning_rate": 0.00019948794965475103,
      "loss": 2.6939,
      "step": 432
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3102374076843262,
      "learning_rate": 0.000199485522086014,
      "loss": 1.2892,
      "step": 433
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.669221818447113,
      "learning_rate": 0.0001994830887913128,
      "loss": 1.9838,
      "step": 434
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8850786685943604,
      "learning_rate": 0.00019948064977078758,
      "loss": 1.665,
      "step": 435
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.275571346282959,
      "learning_rate": 0.00019947820502457864,
      "loss": 2.1985,
      "step": 436
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2471579313278198,
      "learning_rate": 0.00019947575455282678,
      "loss": 2.148,
      "step": 437
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2751306295394897,
      "learning_rate": 0.00019947329835567302,
      "loss": 1.5993,
      "step": 438
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4195226430892944,
      "learning_rate": 0.00019947083643325866,
      "loss": 1.0091,
      "step": 439
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8218662142753601,
      "learning_rate": 0.00019946836878572544,
      "loss": 1.8316,
      "step": 440
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8148865699768066,
      "learning_rate": 0.00019946589541321538,
      "loss": 1.5417,
      "step": 441
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3524208068847656,
      "learning_rate": 0.00019946341631587087,
      "loss": 1.5768,
      "step": 442
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.027645468711853,
      "learning_rate": 0.00019946093149383453,
      "loss": 2.0214,
      "step": 443
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8279436230659485,
      "learning_rate": 0.00019945844094724943,
      "loss": 1.8645,
      "step": 444
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.144689917564392,
      "learning_rate": 0.00019945594467625895,
      "loss": 1.6579,
      "step": 445
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0897934436798096,
      "learning_rate": 0.00019945344268100664,
      "loss": 1.7073,
      "step": 446
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9557405710220337,
      "learning_rate": 0.00019945093496163662,
      "loss": 2.4874,
      "step": 447
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5208145380020142,
      "learning_rate": 0.0001994484215182932,
      "loss": 1.7406,
      "step": 448
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5706861019134521,
      "learning_rate": 0.000199445902351121,
      "loss": 1.7615,
      "step": 449
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0953196287155151,
      "learning_rate": 0.00019944337746026502,
      "loss": 1.5082,
      "step": 450
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5043058395385742,
      "learning_rate": 0.0001994408468458706,
      "loss": 1.6829,
      "step": 451
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.7608324885368347,
      "learning_rate": 0.00019943831050808344,
      "loss": 1.5251,
      "step": 452
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.0430620908737183,
      "learning_rate": 0.00019943576844704943,
      "loss": 2.2187,
      "step": 453
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7862794399261475,
      "learning_rate": 0.00019943322066291494,
      "loss": 1.4719,
      "step": 454
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.6729375123977661,
      "learning_rate": 0.0001994306671558266,
      "loss": 1.3183,
      "step": 455
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9884502291679382,
      "learning_rate": 0.00019942810792593135,
      "loss": 2.2234,
      "step": 456
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.9170588850975037,
      "learning_rate": 0.00019942554297337656,
      "loss": 2.0014,
      "step": 457
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.485527753829956,
      "learning_rate": 0.00019942297229830973,
      "loss": 1.7437,
      "step": 458
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8753142356872559,
      "learning_rate": 0.00019942039590087896,
      "loss": 1.4787,
      "step": 459
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3492156267166138,
      "learning_rate": 0.00019941781378123244,
      "loss": 2.1226,
      "step": 460
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.269972324371338,
      "learning_rate": 0.00019941522593951882,
      "loss": 2.5528,
      "step": 461
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6186211109161377,
      "learning_rate": 0.00019941263237588706,
      "loss": 1.0722,
      "step": 462
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1243976354599,
      "learning_rate": 0.0001994100330904864,
      "loss": 1.1273,
      "step": 463
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.369828224182129,
      "learning_rate": 0.00019940742808346646,
      "loss": 1.9363,
      "step": 464
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7316673994064331,
      "learning_rate": 0.0001994048173549772,
      "loss": 1.6407,
      "step": 465
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3055022954940796,
      "learning_rate": 0.0001994022009051688,
      "loss": 1.7298,
      "step": 466
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.377859592437744,
      "learning_rate": 0.00019939957873419198,
      "loss": 1.5371,
      "step": 467
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9250656366348267,
      "learning_rate": 0.00019939695084219753,
      "loss": 1.8237,
      "step": 468
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7937681674957275,
      "learning_rate": 0.0001993943172293368,
      "loss": 1.9562,
      "step": 469
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.190352201461792,
      "learning_rate": 0.0001993916778957613,
      "loss": 1.949,
      "step": 470
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.109377861022949,
      "learning_rate": 0.000199389032841623,
      "loss": 2.028,
      "step": 471
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1887874603271484,
      "learning_rate": 0.00019938638206707404,
      "loss": 1.7626,
      "step": 472
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0868618488311768,
      "learning_rate": 0.00019938372557226712,
      "loss": 2.2803,
      "step": 473
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7659493088722229,
      "learning_rate": 0.00019938106335735506,
      "loss": 1.6801,
      "step": 474
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6044495105743408,
      "learning_rate": 0.00019937839542249108,
      "loss": 2.2845,
      "step": 475
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.352586269378662,
      "learning_rate": 0.00019937572176782875,
      "loss": 1.8952,
      "step": 476
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8832406401634216,
      "learning_rate": 0.00019937304239352196,
      "loss": 1.5642,
      "step": 477
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.227986454963684,
      "learning_rate": 0.00019937035729972492,
      "loss": 2.0436,
      "step": 478
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3307628631591797,
      "learning_rate": 0.00019936766648659217,
      "loss": 1.0592,
      "step": 479
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.9227664470672607,
      "learning_rate": 0.0001993649699542786,
      "loss": 3.2663,
      "step": 480
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.025710940361023,
      "learning_rate": 0.00019936226770293937,
      "loss": 1.7685,
      "step": 481
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6802994012832642,
      "learning_rate": 0.00019935955973273003,
      "loss": 2.4752,
      "step": 482
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.292158603668213,
      "learning_rate": 0.00019935684604380647,
      "loss": 2.4869,
      "step": 483
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3714019060134888,
      "learning_rate": 0.00019935412663632483,
      "loss": 2.3713,
      "step": 484
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9678196907043457,
      "learning_rate": 0.00019935140151044166,
      "loss": 1.7922,
      "step": 485
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2730345726013184,
      "learning_rate": 0.0001993486706663138,
      "loss": 1.7653,
      "step": 486
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9970576167106628,
      "learning_rate": 0.00019934593410409838,
      "loss": 1.8393,
      "step": 487
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2628926038742065,
      "learning_rate": 0.000199343191823953,
      "loss": 1.7678,
      "step": 488
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2299526929855347,
      "learning_rate": 0.0001993404438260354,
      "loss": 1.8488,
      "step": 489
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9126286506652832,
      "learning_rate": 0.0001993376901105038,
      "loss": 1.8117,
      "step": 490
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0807137489318848,
      "learning_rate": 0.0001993349306775167,
      "loss": 1.7342,
      "step": 491
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0787583589553833,
      "learning_rate": 0.00019933216552723285,
      "loss": 1.6369,
      "step": 492
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9269427061080933,
      "learning_rate": 0.00019932939465981147,
      "loss": 1.3994,
      "step": 493
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8355039954185486,
      "learning_rate": 0.00019932661807541205,
      "loss": 2.1285,
      "step": 494
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8494008183479309,
      "learning_rate": 0.00019932383577419432,
      "loss": 1.6983,
      "step": 495
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8657335638999939,
      "learning_rate": 0.00019932104775631846,
      "loss": 2.1238,
      "step": 496
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2071126699447632,
      "learning_rate": 0.00019931825402194498,
      "loss": 2.8578,
      "step": 497
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9443709850311279,
      "learning_rate": 0.00019931545457123456,
      "loss": 1.5496,
      "step": 498
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.951982319355011,
      "learning_rate": 0.00019931264940434846,
      "loss": 1.6826,
      "step": 499
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.727142572402954,
      "learning_rate": 0.00019930983852144808,
      "loss": 2.8614,
      "step": 500
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.546828269958496,
      "learning_rate": 0.00019930702192269518,
      "loss": 1.4535,
      "step": 501
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.818228006362915,
      "learning_rate": 0.00019930419960825186,
      "loss": 1.6694,
      "step": 502
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9196067452430725,
      "learning_rate": 0.0001993013715782806,
      "loss": 1.3774,
      "step": 503
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.56898033618927,
      "learning_rate": 0.00019929853783294416,
      "loss": 1.8299,
      "step": 504
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.305321216583252,
      "learning_rate": 0.00019929569837240564,
      "loss": 1.8915,
      "step": 505
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.535030364990234,
      "learning_rate": 0.00019929285319682846,
      "loss": 2.3157,
      "step": 506
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9098994135856628,
      "learning_rate": 0.0001992900023063764,
      "loss": 2.3145,
      "step": 507
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0318323373794556,
      "learning_rate": 0.0001992871457012135,
      "loss": 1.7737,
      "step": 508
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9386898875236511,
      "learning_rate": 0.00019928428338150417,
      "loss": 1.1177,
      "step": 509
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.845354437828064,
      "learning_rate": 0.00019928141534741322,
      "loss": 1.7371,
      "step": 510
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3709521293640137,
      "learning_rate": 0.00019927854159910565,
      "loss": 1.2015,
      "step": 511
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7224085927009583,
      "learning_rate": 0.0001992756621367469,
      "loss": 1.9887,
      "step": 512
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.242624044418335,
      "learning_rate": 0.00019927277696050273,
      "loss": 2.0312,
      "step": 513
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0234167575836182,
      "learning_rate": 0.00019926988607053914,
      "loss": 1.4912,
      "step": 514
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6961313486099243,
      "learning_rate": 0.00019926698946702253,
      "loss": 1.8574,
      "step": 515
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.138180136680603,
      "learning_rate": 0.0001992640871501196,
      "loss": 1.8308,
      "step": 516
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1269299983978271,
      "learning_rate": 0.00019926117911999745,
      "loss": 1.0795,
      "step": 517
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7964069843292236,
      "learning_rate": 0.00019925826537682344,
      "loss": 2.3585,
      "step": 518
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0553503036499023,
      "learning_rate": 0.00019925534592076525,
      "loss": 1.6835,
      "step": 519
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0402042865753174,
      "learning_rate": 0.0001992524207519909,
      "loss": 2.0614,
      "step": 520
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2070157527923584,
      "learning_rate": 0.00019924948987066877,
      "loss": 1.7666,
      "step": 521
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.231628656387329,
      "learning_rate": 0.00019924655327696752,
      "loss": 2.0729,
      "step": 522
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.170804738998413,
      "learning_rate": 0.00019924361097105623,
      "loss": 2.2672,
      "step": 523
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2463953495025635,
      "learning_rate": 0.0001992406629531042,
      "loss": 1.3914,
      "step": 524
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4070277214050293,
      "learning_rate": 0.00019923770922328113,
      "loss": 2.085,
      "step": 525
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3588836193084717,
      "learning_rate": 0.000199234749781757,
      "loss": 1.7709,
      "step": 526
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9302634000778198,
      "learning_rate": 0.00019923178462870214,
      "loss": 2.3897,
      "step": 527
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.136529803276062,
      "learning_rate": 0.00019922881376428726,
      "loss": 1.5739,
      "step": 528
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7672291398048401,
      "learning_rate": 0.0001992258371886833,
      "loss": 1.7259,
      "step": 529
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5424941778182983,
      "learning_rate": 0.00019922285490206156,
      "loss": 1.5291,
      "step": 530
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8407881259918213,
      "learning_rate": 0.00019921986690459376,
      "loss": 1.5446,
      "step": 531
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.488571286201477,
      "learning_rate": 0.00019921687319645183,
      "loss": 1.8044,
      "step": 532
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8448314666748047,
      "learning_rate": 0.00019921387377780812,
      "loss": 0.9537,
      "step": 533
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3910027742385864,
      "learning_rate": 0.00019921086864883517,
      "loss": 2.2343,
      "step": 534
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7811708450317383,
      "learning_rate": 0.00019920785780970604,
      "loss": 1.6162,
      "step": 535
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.122805118560791,
      "learning_rate": 0.00019920484126059393,
      "loss": 1.38,
      "step": 536
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5076838731765747,
      "learning_rate": 0.00019920181900167256,
      "loss": 1.8555,
      "step": 537
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9001392126083374,
      "learning_rate": 0.0001991987910331158,
      "loss": 1.4214,
      "step": 538
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4340485334396362,
      "learning_rate": 0.00019919575735509798,
      "loss": 1.28,
      "step": 539
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.789979100227356,
      "learning_rate": 0.0001991927179677937,
      "loss": 1.6609,
      "step": 540
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8958202004432678,
      "learning_rate": 0.00019918967287137784,
      "loss": 1.4587,
      "step": 541
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1121066808700562,
      "learning_rate": 0.0001991866220660257,
      "loss": 1.7965,
      "step": 542
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8832170963287354,
      "learning_rate": 0.00019918356555191288,
      "loss": 1.7963,
      "step": 543
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1081658601760864,
      "learning_rate": 0.0001991805033292153,
      "loss": 2.2899,
      "step": 544
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0730336904525757,
      "learning_rate": 0.0001991774353981092,
      "loss": 1.5225,
      "step": 545
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8597458004951477,
      "learning_rate": 0.00019917436175877112,
      "loss": 1.5631,
      "step": 546
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.5549376010894775,
      "learning_rate": 0.00019917128241137805,
      "loss": 2.165,
      "step": 547
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8411698341369629,
      "learning_rate": 0.00019916819735610715,
      "loss": 1.7155,
      "step": 548
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9265092015266418,
      "learning_rate": 0.00019916510659313602,
      "loss": 2.0938,
      "step": 549
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.000675916671753,
      "learning_rate": 0.00019916201012264254,
      "loss": 1.8791,
      "step": 550
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.617866039276123,
      "learning_rate": 0.0001991589079448049,
      "loss": 1.3654,
      "step": 551
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0152662992477417,
      "learning_rate": 0.0001991558000598017,
      "loss": 2.2489,
      "step": 552
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9885780215263367,
      "learning_rate": 0.0001991526864678118,
      "loss": 1.9901,
      "step": 553
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3978874683380127,
      "learning_rate": 0.00019914956716901442,
      "loss": 2.5093,
      "step": 554
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1421045064926147,
      "learning_rate": 0.000199146442163589,
      "loss": 1.4849,
      "step": 555
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2590136528015137,
      "learning_rate": 0.00019914331145171553,
      "loss": 2.1332,
      "step": 556
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.127567172050476,
      "learning_rate": 0.00019914017503357414,
      "loss": 1.2496,
      "step": 557
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.942218542098999,
      "learning_rate": 0.00019913703290934536,
      "loss": 1.5716,
      "step": 558
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9201147556304932,
      "learning_rate": 0.00019913388507921003,
      "loss": 2.2332,
      "step": 559
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.531490802764893,
      "learning_rate": 0.0001991307315433493,
      "loss": 2.1346,
      "step": 560
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7855700850486755,
      "learning_rate": 0.00019912757230194475,
      "loss": 1.8506,
      "step": 561
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3154869079589844,
      "learning_rate": 0.0001991244073551781,
      "loss": 2.8203,
      "step": 562
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0433114767074585,
      "learning_rate": 0.00019912123670323163,
      "loss": 1.0289,
      "step": 563
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5486971139907837,
      "learning_rate": 0.00019911806034628775,
      "loss": 2.2434,
      "step": 564
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.6565307378768921,
      "learning_rate": 0.00019911487828452932,
      "loss": 1.7319,
      "step": 565
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0826821327209473,
      "learning_rate": 0.00019911169051813947,
      "loss": 1.4815,
      "step": 566
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.381022334098816,
      "learning_rate": 0.00019910849704730165,
      "loss": 1.6314,
      "step": 567
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.001729965209961,
      "learning_rate": 0.00019910529787219968,
      "loss": 2.1549,
      "step": 568
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9051113128662109,
      "learning_rate": 0.0001991020929930177,
      "loss": 1.7931,
      "step": 569
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9223996996879578,
      "learning_rate": 0.00019909888240994019,
      "loss": 1.2586,
      "step": 570
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1280508041381836,
      "learning_rate": 0.0001990956661231519,
      "loss": 2.1291,
      "step": 571
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9192841053009033,
      "learning_rate": 0.00019909244413283795,
      "loss": 1.4406,
      "step": 572
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4172418117523193,
      "learning_rate": 0.0001990892164391838,
      "loss": 1.9024,
      "step": 573
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2600643634796143,
      "learning_rate": 0.00019908598304237523,
      "loss": 1.736,
      "step": 574
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9890273213386536,
      "learning_rate": 0.00019908274394259828,
      "loss": 1.9325,
      "step": 575
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.644598126411438,
      "learning_rate": 0.00019907949914003948,
      "loss": 2.1256,
      "step": 576
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.0348014831542969,
      "learning_rate": 0.0001990762486348855,
      "loss": 1.7496,
      "step": 577
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.776682436466217,
      "learning_rate": 0.00019907299242732343,
      "loss": 1.7804,
      "step": 578
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.894554853439331,
      "learning_rate": 0.00019906973051754076,
      "loss": 1.2628,
      "step": 579
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.8768832087516785,
      "learning_rate": 0.00019906646290572514,
      "loss": 1.3515,
      "step": 580
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1905795335769653,
      "learning_rate": 0.0001990631895920647,
      "loss": 1.418,
      "step": 581
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.9386431574821472,
      "learning_rate": 0.00019905991057674784,
      "loss": 1.7943,
      "step": 582
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.843012809753418,
      "learning_rate": 0.00019905662585996327,
      "loss": 2.0141,
      "step": 583
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3263554573059082,
      "learning_rate": 0.0001990533354419,
      "loss": 1.6294,
      "step": 584
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.206786036491394,
      "learning_rate": 0.0001990500393227475,
      "loss": 1.9003,
      "step": 585
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.574519157409668,
      "learning_rate": 0.00019904673750269537,
      "loss": 1.5616,
      "step": 586
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8661242723464966,
      "learning_rate": 0.00019904342998193375,
      "loss": 2.5062,
      "step": 587
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.745082378387451,
      "learning_rate": 0.00019904011676065298,
      "loss": 2.4021,
      "step": 588
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.201487421989441,
      "learning_rate": 0.00019903679783904373,
      "loss": 1.8856,
      "step": 589
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6647714376449585,
      "learning_rate": 0.00019903347321729702,
      "loss": 1.6652,
      "step": 590
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.227949857711792,
      "learning_rate": 0.00019903014289560425,
      "loss": 1.7564,
      "step": 591
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9697912335395813,
      "learning_rate": 0.00019902680687415705,
      "loss": 2.1168,
      "step": 592
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1455304622650146,
      "learning_rate": 0.0001990234651531474,
      "loss": 1.7079,
      "step": 593
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.863980233669281,
      "learning_rate": 0.00019902011773276776,
      "loss": 0.6242,
      "step": 594
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8121053576469421,
      "learning_rate": 0.00019901676461321068,
      "loss": 1.5246,
      "step": 595
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9297714233398438,
      "learning_rate": 0.0001990134057946692,
      "loss": 2.2927,
      "step": 596
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8305476903915405,
      "learning_rate": 0.0001990100412773366,
      "loss": 1.8397,
      "step": 597
    },
    {
      "epoch": 0.05,
      "grad_norm": 4.03169059753418,
      "learning_rate": 0.00019900667106140653,
      "loss": 3.0429,
      "step": 598
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5734384059906006,
      "learning_rate": 0.00019900329514707302,
      "loss": 1.8714,
      "step": 599
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0493327379226685,
      "learning_rate": 0.0001989999135345303,
      "loss": 1.8666,
      "step": 600
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9474815130233765,
      "learning_rate": 0.00019899652622397312,
      "loss": 2.7101,
      "step": 601
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9634419679641724,
      "learning_rate": 0.0001989931332155963,
      "loss": 1.8255,
      "step": 602
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0070130825042725,
      "learning_rate": 0.00019898973450959518,
      "loss": 2.8018,
      "step": 603
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7401652336120605,
      "learning_rate": 0.00019898633010616542,
      "loss": 1.5124,
      "step": 604
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.180788516998291,
      "learning_rate": 0.0001989829200055029,
      "loss": 1.2891,
      "step": 605
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9287000298500061,
      "learning_rate": 0.00019897950420780393,
      "loss": 0.7852,
      "step": 606
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8941931128501892,
      "learning_rate": 0.0001989760827132651,
      "loss": 1.6983,
      "step": 607
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9296355247497559,
      "learning_rate": 0.00019897265552208332,
      "loss": 1.792,
      "step": 608
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5961036682128906,
      "learning_rate": 0.00019896922263445583,
      "loss": 2.0567,
      "step": 609
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.8083949089050293,
      "learning_rate": 0.00019896578405058027,
      "loss": 2.7128,
      "step": 610
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6851984858512878,
      "learning_rate": 0.0001989623397706545,
      "loss": 1.709,
      "step": 611
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8222078680992126,
      "learning_rate": 0.00019895888979487678,
      "loss": 0.7916,
      "step": 612
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2292481660842896,
      "learning_rate": 0.0001989554341234457,
      "loss": 1.8786,
      "step": 613
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.50870943069458,
      "learning_rate": 0.0001989519727565601,
      "loss": 1.7995,
      "step": 614
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0674554109573364,
      "learning_rate": 0.00019894850569441923,
      "loss": 1.4553,
      "step": 615
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8504047393798828,
      "learning_rate": 0.00019894503293722265,
      "loss": 1.7537,
      "step": 616
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0797817707061768,
      "learning_rate": 0.0001989415544851702,
      "loss": 1.9213,
      "step": 617
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2434638738632202,
      "learning_rate": 0.00019893807033846214,
      "loss": 2.4476,
      "step": 618
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.063407063484192,
      "learning_rate": 0.00019893458049729897,
      "loss": 1.8089,
      "step": 619
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2380033731460571,
      "learning_rate": 0.00019893108496188156,
      "loss": 1.4501,
      "step": 620
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.01608943939209,
      "learning_rate": 0.00019892758373241104,
      "loss": 1.67,
      "step": 621
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6932713985443115,
      "learning_rate": 0.00019892407680908902,
      "loss": 1.3876,
      "step": 622
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8192082643508911,
      "learning_rate": 0.0001989205641921173,
      "loss": 1.7228,
      "step": 623
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2761865854263306,
      "learning_rate": 0.00019891704588169805,
      "loss": 1.9749,
      "step": 624
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6852318644523621,
      "learning_rate": 0.00019891352187803376,
      "loss": 1.7612,
      "step": 625
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1260614395141602,
      "learning_rate": 0.00019890999218132727,
      "loss": 1.3779,
      "step": 626
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1345674991607666,
      "learning_rate": 0.00019890645679178172,
      "loss": 1.6533,
      "step": 627
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0703322887420654,
      "learning_rate": 0.00019890291570960064,
      "loss": 1.7996,
      "step": 628
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.4704151153564453,
      "learning_rate": 0.00019889936893498775,
      "loss": 2.8011,
      "step": 629
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2563787698745728,
      "learning_rate": 0.00019889581646814728,
      "loss": 2.1598,
      "step": 630
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8718955516815186,
      "learning_rate": 0.00019889225830928365,
      "loss": 1.6801,
      "step": 631
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8769968152046204,
      "learning_rate": 0.00019888869445860166,
      "loss": 1.4481,
      "step": 632
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3745808601379395,
      "learning_rate": 0.00019888512491630643,
      "loss": 1.9026,
      "step": 633
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8898916244506836,
      "learning_rate": 0.0001988815496826034,
      "loss": 2.1696,
      "step": 634
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6377655863761902,
      "learning_rate": 0.00019887796875769836,
      "loss": 1.0082,
      "step": 635
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7769274115562439,
      "learning_rate": 0.00019887438214179738,
      "loss": 2.2098,
      "step": 636
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2250186204910278,
      "learning_rate": 0.00019887078983510693,
      "loss": 1.6612,
      "step": 637
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6679250001907349,
      "learning_rate": 0.00019886719183783376,
      "loss": 1.4532,
      "step": 638
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.564584732055664,
      "learning_rate": 0.00019886358815018494,
      "loss": 1.3283,
      "step": 639
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1175780296325684,
      "learning_rate": 0.00019885997877236788,
      "loss": 1.7968,
      "step": 640
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1766550540924072,
      "learning_rate": 0.00019885636370459034,
      "loss": 1.7233,
      "step": 641
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2961431741714478,
      "learning_rate": 0.0001988527429470604,
      "loss": 1.9301,
      "step": 642
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8338738083839417,
      "learning_rate": 0.00019884911649998638,
      "loss": 1.0785,
      "step": 643
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9106821417808533,
      "learning_rate": 0.00019884548436357712,
      "loss": 1.8246,
      "step": 644
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8977292776107788,
      "learning_rate": 0.0001988418465380416,
      "loss": 2.1133,
      "step": 645
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3764441013336182,
      "learning_rate": 0.00019883820302358916,
      "loss": 2.0292,
      "step": 646
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.030801773071289,
      "learning_rate": 0.00019883455382042957,
      "loss": 2.1283,
      "step": 647
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8743224740028381,
      "learning_rate": 0.00019883089892877286,
      "loss": 1.6442,
      "step": 648
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9732521176338196,
      "learning_rate": 0.00019882723834882936,
      "loss": 1.7023,
      "step": 649
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4625818729400635,
      "learning_rate": 0.00019882357208080977,
      "loss": 1.6065,
      "step": 650
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5077717304229736,
      "learning_rate": 0.00019881990012492513,
      "loss": 2.4682,
      "step": 651
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7774366736412048,
      "learning_rate": 0.0001988162224813867,
      "loss": 1.5882,
      "step": 652
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0460829734802246,
      "learning_rate": 0.00019881253915040627,
      "loss": 1.3872,
      "step": 653
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.72381591796875,
      "learning_rate": 0.00019880885013219576,
      "loss": 1.9998,
      "step": 654
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.416053295135498,
      "learning_rate": 0.0001988051554269675,
      "loss": 2.0316,
      "step": 655
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.214229106903076,
      "learning_rate": 0.00019880145503493415,
      "loss": 1.4727,
      "step": 656
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3838396072387695,
      "learning_rate": 0.0001987977489563087,
      "loss": 1.3949,
      "step": 657
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.167048692703247,
      "learning_rate": 0.0001987940371913044,
      "loss": 2.0532,
      "step": 658
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.6867437958717346,
      "learning_rate": 0.000198790319740135,
      "loss": 1.3526,
      "step": 659
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0312790870666504,
      "learning_rate": 0.00019878659660301434,
      "loss": 1.2838,
      "step": 660
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1610572338104248,
      "learning_rate": 0.00019878286778015676,
      "loss": 1.4136,
      "step": 661
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0034325122833252,
      "learning_rate": 0.00019877913327177686,
      "loss": 2.1982,
      "step": 662
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.155191421508789,
      "learning_rate": 0.00019877539307808963,
      "loss": 1.984,
      "step": 663
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.325958013534546,
      "learning_rate": 0.0001987716471993103,
      "loss": 1.7743,
      "step": 664
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8891846537590027,
      "learning_rate": 0.00019876789563565446,
      "loss": 1.2406,
      "step": 665
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3322629928588867,
      "learning_rate": 0.00019876413838733803,
      "loss": 1.4987,
      "step": 666
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.524440050125122,
      "learning_rate": 0.00019876037545457731,
      "loss": 2.5959,
      "step": 667
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0011005401611328,
      "learning_rate": 0.00019875660683758882,
      "loss": 1.3538,
      "step": 668
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9662484526634216,
      "learning_rate": 0.00019875283253658948,
      "loss": 1.6391,
      "step": 669
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9437063336372375,
      "learning_rate": 0.0001987490525517966,
      "loss": 1.5369,
      "step": 670
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0212557315826416,
      "learning_rate": 0.0001987452668834276,
      "loss": 1.8122,
      "step": 671
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0020177364349365,
      "learning_rate": 0.00019874147553170047,
      "loss": 1.6058,
      "step": 672
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1929576396942139,
      "learning_rate": 0.0001987376784968334,
      "loss": 1.6468,
      "step": 673
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0996648073196411,
      "learning_rate": 0.0001987338757790449,
      "loss": 2.1958,
      "step": 674
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.8297109603881836,
      "learning_rate": 0.0001987300673785539,
      "loss": 2.725,
      "step": 675
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8805826902389526,
      "learning_rate": 0.00019872625329557953,
      "loss": 1.5664,
      "step": 676
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1591073274612427,
      "learning_rate": 0.00019872243353034135,
      "loss": 1.7903,
      "step": 677
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8481267690658569,
      "learning_rate": 0.0001987186080830592,
      "loss": 1.657,
      "step": 678
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8137130737304688,
      "learning_rate": 0.00019871477695395325,
      "loss": 1.3483,
      "step": 679
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8287789821624756,
      "learning_rate": 0.00019871094014324404,
      "loss": 1.8793,
      "step": 680
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.320704698562622,
      "learning_rate": 0.00019870709765115234,
      "loss": 1.5321,
      "step": 681
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.384843111038208,
      "learning_rate": 0.00019870324947789936,
      "loss": 1.2794,
      "step": 682
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.813734531402588,
      "learning_rate": 0.00019869939562370655,
      "loss": 2.4891,
      "step": 683
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.84090256690979,
      "learning_rate": 0.00019869553608879573,
      "loss": 2.1723,
      "step": 684
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8586093783378601,
      "learning_rate": 0.00019869167087338907,
      "loss": 1.7326,
      "step": 685
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1801090240478516,
      "learning_rate": 0.00019868779997770898,
      "loss": 1.8606,
      "step": 686
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9599052667617798,
      "learning_rate": 0.00019868392340197832,
      "loss": 1.432,
      "step": 687
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.1205809116363525,
      "learning_rate": 0.00019868004114642016,
      "loss": 1.5909,
      "step": 688
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4996355772018433,
      "learning_rate": 0.00019867615321125795,
      "loss": 1.824,
      "step": 689
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.251436471939087,
      "learning_rate": 0.00019867225959671543,
      "loss": 1.5943,
      "step": 690
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5050804615020752,
      "learning_rate": 0.0001986683603030168,
      "loss": 1.6909,
      "step": 691
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7786747813224792,
      "learning_rate": 0.00019866445533038639,
      "loss": 1.9336,
      "step": 692
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2738980054855347,
      "learning_rate": 0.000198660544679049,
      "loss": 1.7291,
      "step": 693
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0456675291061401,
      "learning_rate": 0.00019865662834922968,
      "loss": 2.1389,
      "step": 694
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.3101062774658203,
      "learning_rate": 0.0001986527063411539,
      "loss": 1.9596,
      "step": 695
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2219034433364868,
      "learning_rate": 0.0001986487786550473,
      "loss": 1.5199,
      "step": 696
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0876213312149048,
      "learning_rate": 0.00019864484529113604,
      "loss": 1.0951,
      "step": 697
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.781679630279541,
      "learning_rate": 0.00019864090624964643,
      "loss": 1.6978,
      "step": 698
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9693881273269653,
      "learning_rate": 0.00019863696153080525,
      "loss": 1.629,
      "step": 699
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.799832820892334,
      "learning_rate": 0.00019863301113483948,
      "loss": 1.5184,
      "step": 700
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.9877590537071228,
      "learning_rate": 0.00019862905506197654,
      "loss": 1.8104,
      "step": 701
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7554662227630615,
      "learning_rate": 0.00019862509331244405,
      "loss": 1.5702,
      "step": 702
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2525984048843384,
      "learning_rate": 0.00019862112588647012,
      "loss": 2.2174,
      "step": 703
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8999351263046265,
      "learning_rate": 0.00019861715278428305,
      "loss": 1.4111,
      "step": 704
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.7935075759887695,
      "learning_rate": 0.0001986131740061115,
      "loss": 1.5278,
      "step": 705
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8195009827613831,
      "learning_rate": 0.00019860918955218453,
      "loss": 1.7408,
      "step": 706
    },
    {
      "epoch": 0.05,
      "grad_norm": 4.711212635040283,
      "learning_rate": 0.00019860519942273142,
      "loss": 2.2583,
      "step": 707
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2960455417633057,
      "learning_rate": 0.00019860120361798184,
      "loss": 1.4755,
      "step": 708
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8966720700263977,
      "learning_rate": 0.00019859720213816578,
      "loss": 1.0902,
      "step": 709
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.041304349899292,
      "learning_rate": 0.00019859319498351352,
      "loss": 1.5696,
      "step": 710
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8317071199417114,
      "learning_rate": 0.00019858918215425573,
      "loss": 1.6771,
      "step": 711
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.069192886352539,
      "learning_rate": 0.00019858516365062334,
      "loss": 2.2964,
      "step": 712
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.381073236465454,
      "learning_rate": 0.00019858113947284766,
      "loss": 1.5487,
      "step": 713
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.04576575756073,
      "learning_rate": 0.0001985771096211603,
      "loss": 1.8041,
      "step": 714
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8494612574577332,
      "learning_rate": 0.00019857307409579318,
      "loss": 1.5111,
      "step": 715
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2805759906768799,
      "learning_rate": 0.0001985690328969786,
      "loss": 1.751,
      "step": 716
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.0859493017196655,
      "learning_rate": 0.00019856498602494914,
      "loss": 1.1127,
      "step": 717
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.857260525226593,
      "learning_rate": 0.0001985609334799377,
      "loss": 1.958,
      "step": 718
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.081974744796753,
      "learning_rate": 0.00019855687526217758,
      "loss": 2.6573,
      "step": 719
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.182760238647461,
      "learning_rate": 0.00019855281137190232,
      "loss": 1.1326,
      "step": 720
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.038393497467041,
      "learning_rate": 0.0001985487418093458,
      "loss": 1.5956,
      "step": 721
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2490646839141846,
      "learning_rate": 0.00019854466657474228,
      "loss": 2.4098,
      "step": 722
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2555872201919556,
      "learning_rate": 0.00019854058566832632,
      "loss": 1.4769,
      "step": 723
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8574446439743042,
      "learning_rate": 0.00019853649909033273,
      "loss": 1.3816,
      "step": 724
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8913145661354065,
      "learning_rate": 0.00019853240684099683,
      "loss": 1.7417,
      "step": 725
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.327716588973999,
      "learning_rate": 0.00019852830892055405,
      "loss": 1.6816,
      "step": 726
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5340549945831299,
      "learning_rate": 0.0001985242053292403,
      "loss": 2.0692,
      "step": 727
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9011388421058655,
      "learning_rate": 0.00019852009606729175,
      "loss": 1.4301,
      "step": 728
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1806663274765015,
      "learning_rate": 0.00019851598113494493,
      "loss": 1.6247,
      "step": 729
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1855579614639282,
      "learning_rate": 0.00019851186053243666,
      "loss": 1.7592,
      "step": 730
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1491153240203857,
      "learning_rate": 0.0001985077342600041,
      "loss": 1.8713,
      "step": 731
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6814614534378052,
      "learning_rate": 0.00019850360231788474,
      "loss": 2.1891,
      "step": 732
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9727683067321777,
      "learning_rate": 0.00019849946470631644,
      "loss": 1.7223,
      "step": 733
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.811038851737976,
      "learning_rate": 0.00019849532142553729,
      "loss": 1.8553,
      "step": 734
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8398392796516418,
      "learning_rate": 0.00019849117247578576,
      "loss": 1.6237,
      "step": 735
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.276308059692383,
      "learning_rate": 0.0001984870178573007,
      "loss": 2.7287,
      "step": 736
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.150078296661377,
      "learning_rate": 0.00019848285757032117,
      "loss": 1.6947,
      "step": 737
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9808902144432068,
      "learning_rate": 0.00019847869161508666,
      "loss": 1.0799,
      "step": 738
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1112680435180664,
      "learning_rate": 0.00019847451999183694,
      "loss": 1.2213,
      "step": 739
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.892336905002594,
      "learning_rate": 0.00019847034270081208,
      "loss": 1.5959,
      "step": 740
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8599410057067871,
      "learning_rate": 0.00019846615974225253,
      "loss": 1.6698,
      "step": 741
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8865697383880615,
      "learning_rate": 0.00019846197111639906,
      "loss": 1.9613,
      "step": 742
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2576864957809448,
      "learning_rate": 0.0001984577768234927,
      "loss": 1.9853,
      "step": 743
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4363685846328735,
      "learning_rate": 0.00019845357686377492,
      "loss": 2.603,
      "step": 744
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4784241914749146,
      "learning_rate": 0.0001984493712374874,
      "loss": 1.7035,
      "step": 745
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8207076787948608,
      "learning_rate": 0.00019844515994487224,
      "loss": 1.1133,
      "step": 746
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0803649425506592,
      "learning_rate": 0.00019844094298617176,
      "loss": 1.6038,
      "step": 747
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.777904748916626,
      "learning_rate": 0.00019843672036162877,
      "loss": 1.3184,
      "step": 748
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8452432751655579,
      "learning_rate": 0.00019843249207148622,
      "loss": 1.3951,
      "step": 749
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8930739760398865,
      "learning_rate": 0.0001984282581159875,
      "loss": 1.5658,
      "step": 750
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9493566751480103,
      "learning_rate": 0.0001984240184953763,
      "loss": 1.1437,
      "step": 751
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7793578505516052,
      "learning_rate": 0.0001984197732098966,
      "loss": 2.1914,
      "step": 752
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.085386037826538,
      "learning_rate": 0.0001984155222597928,
      "loss": 0.7999,
      "step": 753
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3589788675308228,
      "learning_rate": 0.00019841126564530955,
      "loss": 1.7454,
      "step": 754
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.7367804050445557,
      "learning_rate": 0.00019840700336669183,
      "loss": 2.6895,
      "step": 755
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6514825820922852,
      "learning_rate": 0.00019840273542418496,
      "loss": 1.3467,
      "step": 756
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6227656602859497,
      "learning_rate": 0.00019839846181803457,
      "loss": 1.8262,
      "step": 757
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1671210527420044,
      "learning_rate": 0.00019839418254848665,
      "loss": 0.9199,
      "step": 758
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9040707945823669,
      "learning_rate": 0.0001983898976157875,
      "loss": 1.9345,
      "step": 759
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5280342102050781,
      "learning_rate": 0.00019838560702018374,
      "loss": 1.4981,
      "step": 760
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6879160404205322,
      "learning_rate": 0.0001983813107619223,
      "loss": 2.2665,
      "step": 761
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8497936725616455,
      "learning_rate": 0.00019837700884125047,
      "loss": 4.0824,
      "step": 762
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8952934741973877,
      "learning_rate": 0.00019837270125841588,
      "loss": 1.3764,
      "step": 763
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.438488721847534,
      "learning_rate": 0.00019836838801366643,
      "loss": 1.9927,
      "step": 764
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.022821307182312,
      "learning_rate": 0.0001983640691072503,
      "loss": 1.2075,
      "step": 765
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5572059154510498,
      "learning_rate": 0.0001983597445394162,
      "loss": 1.9705,
      "step": 766
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.6128084659576416,
      "learning_rate": 0.00019835541431041293,
      "loss": 2.2422,
      "step": 767
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9772310853004456,
      "learning_rate": 0.0001983510784204898,
      "loss": 1.5019,
      "step": 768
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8061856031417847,
      "learning_rate": 0.00019834673686989628,
      "loss": 1.703,
      "step": 769
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.208184838294983,
      "learning_rate": 0.00019834238965888232,
      "loss": 1.5258,
      "step": 770
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0176557302474976,
      "learning_rate": 0.0001983380367876981,
      "loss": 1.5322,
      "step": 771
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3476306200027466,
      "learning_rate": 0.00019833367825659415,
      "loss": 1.8695,
      "step": 772
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8621996641159058,
      "learning_rate": 0.00019832931406582133,
      "loss": 1.235,
      "step": 773
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9286785125732422,
      "learning_rate": 0.00019832494421563083,
      "loss": 1.4206,
      "step": 774
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3935219049453735,
      "learning_rate": 0.00019832056870627417,
      "loss": 1.5301,
      "step": 775
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.008159875869751,
      "learning_rate": 0.00019831618753800317,
      "loss": 1.2733,
      "step": 776
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.954288899898529,
      "learning_rate": 0.00019831180071107,
      "loss": 1.785,
      "step": 777
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8085010051727295,
      "learning_rate": 0.00019830740822572712,
      "loss": 1.4032,
      "step": 778
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8297780752182007,
      "learning_rate": 0.00019830301008222739,
      "loss": 1.6486,
      "step": 779
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.552234649658203,
      "learning_rate": 0.0001982986062808239,
      "loss": 2.3755,
      "step": 780
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1092941761016846,
      "learning_rate": 0.00019829419682177015,
      "loss": 1.8612,
      "step": 781
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8971766233444214,
      "learning_rate": 0.0001982897817053199,
      "loss": 1.8354,
      "step": 782
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.032141923904419,
      "learning_rate": 0.00019828536093172733,
      "loss": 1.5376,
      "step": 783
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4096882343292236,
      "learning_rate": 0.0001982809345012468,
      "loss": 1.2976,
      "step": 784
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9756548404693604,
      "learning_rate": 0.00019827650241413308,
      "loss": 2.3844,
      "step": 785
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.880224347114563,
      "learning_rate": 0.00019827206467064133,
      "loss": 1.5598,
      "step": 786
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0146435499191284,
      "learning_rate": 0.00019826762127102694,
      "loss": 1.9215,
      "step": 787
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.963212490081787,
      "learning_rate": 0.00019826317221554563,
      "loss": 1.3629,
      "step": 788
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9533901214599609,
      "learning_rate": 0.00019825871750445346,
      "loss": 1.991,
      "step": 789
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9453014731407166,
      "learning_rate": 0.00019825425713800688,
      "loss": 1.9249,
      "step": 790
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2192968130111694,
      "learning_rate": 0.00019824979111646255,
      "loss": 1.8378,
      "step": 791
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.578550100326538,
      "learning_rate": 0.00019824531944007755,
      "loss": 1.3522,
      "step": 792
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1090641021728516,
      "learning_rate": 0.00019824084210910925,
      "loss": 1.7239,
      "step": 793
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8000882267951965,
      "learning_rate": 0.00019823635912381534,
      "loss": 1.6905,
      "step": 794
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1289677619934082,
      "learning_rate": 0.00019823187048445383,
      "loss": 1.7226,
      "step": 795
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8893176317214966,
      "learning_rate": 0.00019822737619128308,
      "loss": 2.6904,
      "step": 796
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5457514524459839,
      "learning_rate": 0.00019822287624456176,
      "loss": 1.9627,
      "step": 797
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3621814250946045,
      "learning_rate": 0.00019821837064454886,
      "loss": 2.1129,
      "step": 798
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3644802570343018,
      "learning_rate": 0.0001982138593915037,
      "loss": 1.1875,
      "step": 799
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8447915315628052,
      "learning_rate": 0.00019820934248568594,
      "loss": 2.0341,
      "step": 800
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1529351472854614,
      "learning_rate": 0.00019820481992735559,
      "loss": 0.7825,
      "step": 801
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9138345718383789,
      "learning_rate": 0.00019820029171677286,
      "loss": 1.8896,
      "step": 802
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.189429521560669,
      "learning_rate": 0.00019819575785419847,
      "loss": 1.8492,
      "step": 803
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.6896252036094666,
      "learning_rate": 0.0001981912183398933,
      "loss": 1.6005,
      "step": 804
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.156744122505188,
      "learning_rate": 0.00019818667317411865,
      "loss": 1.3106,
      "step": 805
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.044086217880249,
      "learning_rate": 0.00019818212235713612,
      "loss": 1.766,
      "step": 806
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1279077529907227,
      "learning_rate": 0.00019817756588920766,
      "loss": 1.7128,
      "step": 807
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.686887264251709,
      "learning_rate": 0.0001981730037705955,
      "loss": 1.1927,
      "step": 808
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1014527082443237,
      "learning_rate": 0.0001981684360015622,
      "loss": 1.6894,
      "step": 809
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2806557416915894,
      "learning_rate": 0.00019816386258237066,
      "loss": 1.4937,
      "step": 810
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4080140590667725,
      "learning_rate": 0.00019815928351328414,
      "loss": 1.9559,
      "step": 811
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.994717538356781,
      "learning_rate": 0.00019815469879456618,
      "loss": 2.4986,
      "step": 812
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4262185096740723,
      "learning_rate": 0.00019815010842648065,
      "loss": 1.7881,
      "step": 813
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7757145166397095,
      "learning_rate": 0.00019814551240929172,
      "loss": 1.8508,
      "step": 814
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.769596576690674,
      "learning_rate": 0.000198140910743264,
      "loss": 2.1312,
      "step": 815
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4213255643844604,
      "learning_rate": 0.00019813630342866228,
      "loss": 2.3698,
      "step": 816
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.777652382850647,
      "learning_rate": 0.00019813169046575176,
      "loss": 1.5842,
      "step": 817
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0768821239471436,
      "learning_rate": 0.0001981270718547979,
      "loss": 1.402,
      "step": 818
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.312333345413208,
      "learning_rate": 0.00019812244759606659,
      "loss": 2.1668,
      "step": 819
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2690857648849487,
      "learning_rate": 0.0001981178176898239,
      "loss": 1.8717,
      "step": 820
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7452908754348755,
      "learning_rate": 0.0001981131821363364,
      "loss": 2.1401,
      "step": 821
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9147129654884338,
      "learning_rate": 0.00019810854093587087,
      "loss": 2.1968,
      "step": 822
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3848068714141846,
      "learning_rate": 0.0001981038940886944,
      "loss": 2.0437,
      "step": 823
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9372680187225342,
      "learning_rate": 0.00019809924159507446,
      "loss": 1.4519,
      "step": 824
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7993059158325195,
      "learning_rate": 0.00019809458345527886,
      "loss": 1.8872,
      "step": 825
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.116987943649292,
      "learning_rate": 0.00019808991966957566,
      "loss": 1.8619,
      "step": 826
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.1875494718551636,
      "learning_rate": 0.0001980852502382333,
      "loss": 1.0535,
      "step": 827
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0127897262573242,
      "learning_rate": 0.00019808057516152052,
      "loss": 1.9093,
      "step": 828
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7488455772399902,
      "learning_rate": 0.0001980758944397064,
      "loss": 1.8316,
      "step": 829
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0216301679611206,
      "learning_rate": 0.0001980712080730604,
      "loss": 1.1104,
      "step": 830
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8441044092178345,
      "learning_rate": 0.00019806651606185218,
      "loss": 1.8908,
      "step": 831
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8813297152519226,
      "learning_rate": 0.00019806181840635183,
      "loss": 1.2759,
      "step": 832
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2068791389465332,
      "learning_rate": 0.00019805711510682969,
      "loss": 1.6314,
      "step": 833
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7098000645637512,
      "learning_rate": 0.0001980524061635565,
      "loss": 1.8347,
      "step": 834
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.109838604927063,
      "learning_rate": 0.00019804769157680328,
      "loss": 1.9628,
      "step": 835
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7259066104888916,
      "learning_rate": 0.00019804297134684134,
      "loss": 1.9623,
      "step": 836
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0328179597854614,
      "learning_rate": 0.0001980382454739424,
      "loss": 2.0444,
      "step": 837
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.104698896408081,
      "learning_rate": 0.00019803351395837848,
      "loss": 2.4005,
      "step": 838
    },
    {
      "epoch": 0.06,
      "grad_norm": 5.021828651428223,
      "learning_rate": 0.00019802877680042183,
      "loss": 2.6938,
      "step": 839
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4651466608047485,
      "learning_rate": 0.00019802403400034517,
      "loss": 2.1022,
      "step": 840
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.044652223587036,
      "learning_rate": 0.00019801928555842143,
      "loss": 1.0458,
      "step": 841
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2299339771270752,
      "learning_rate": 0.00019801453147492396,
      "loss": 2.4403,
      "step": 842
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0112160444259644,
      "learning_rate": 0.00019800977175012636,
      "loss": 1.7015,
      "step": 843
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7485311627388,
      "learning_rate": 0.00019800500638430258,
      "loss": 1.1169,
      "step": 844
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.7663161754608154,
      "learning_rate": 0.00019800023537772687,
      "loss": 1.2213,
      "step": 845
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9574807286262512,
      "learning_rate": 0.00019799545873067385,
      "loss": 1.6043,
      "step": 846
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9403781294822693,
      "learning_rate": 0.00019799067644341844,
      "loss": 1.578,
      "step": 847
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.952653169631958,
      "learning_rate": 0.0001979858885162359,
      "loss": 1.8872,
      "step": 848
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.957654595375061,
      "learning_rate": 0.0001979810949494018,
      "loss": 1.8076,
      "step": 849
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.046320915222168,
      "learning_rate": 0.000197976295743192,
      "loss": 1.8006,
      "step": 850
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.8641126155853271,
      "learning_rate": 0.00019797149089788277,
      "loss": 1.5211,
      "step": 851
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.110306978225708,
      "learning_rate": 0.00019796668041375067,
      "loss": 2.4593,
      "step": 852
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.117106318473816,
      "learning_rate": 0.00019796186429107252,
      "loss": 2.321,
      "step": 853
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4969090223312378,
      "learning_rate": 0.00019795704253012551,
      "loss": 1.4304,
      "step": 854
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0167938470840454,
      "learning_rate": 0.00019795221513118722,
      "loss": 1.352,
      "step": 855
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.021862506866455,
      "learning_rate": 0.00019794738209453545,
      "loss": 1.2703,
      "step": 856
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6879276633262634,
      "learning_rate": 0.0001979425434204484,
      "loss": 1.24,
      "step": 857
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9491322040557861,
      "learning_rate": 0.00019793769910920453,
      "loss": 2.5223,
      "step": 858
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3366868495941162,
      "learning_rate": 0.00019793284916108265,
      "loss": 2.096,
      "step": 859
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.056262731552124,
      "learning_rate": 0.00019792799357636192,
      "loss": 1.7624,
      "step": 860
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.5525050163269043,
      "learning_rate": 0.00019792313235532185,
      "loss": 2.3484,
      "step": 861
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5184611082077026,
      "learning_rate": 0.00019791826549824215,
      "loss": 1.0957,
      "step": 862
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9048483371734619,
      "learning_rate": 0.00019791339300540302,
      "loss": 2.2778,
      "step": 863
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1752495765686035,
      "learning_rate": 0.00019790851487708482,
      "loss": 2.2643,
      "step": 864
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.361531972885132,
      "learning_rate": 0.00019790363111356837,
      "loss": 2.0264,
      "step": 865
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8604225516319275,
      "learning_rate": 0.00019789874171513474,
      "loss": 2.143,
      "step": 866
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7787919044494629,
      "learning_rate": 0.00019789384668206534,
      "loss": 1.6303,
      "step": 867
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.318676710128784,
      "learning_rate": 0.0001978889460146419,
      "loss": 2.7063,
      "step": 868
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9227376580238342,
      "learning_rate": 0.0001978840397131465,
      "loss": 2.0466,
      "step": 869
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9621989130973816,
      "learning_rate": 0.00019787912777786154,
      "loss": 1.6595,
      "step": 870
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7538005113601685,
      "learning_rate": 0.0001978742102090697,
      "loss": 1.7553,
      "step": 871
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2514182329177856,
      "learning_rate": 0.00019786928700705404,
      "loss": 1.9572,
      "step": 872
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7843230366706848,
      "learning_rate": 0.00019786435817209784,
      "loss": 1.8525,
      "step": 873
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3243868350982666,
      "learning_rate": 0.0001978594237044849,
      "loss": 1.5701,
      "step": 874
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9091726541519165,
      "learning_rate": 0.00019785448360449913,
      "loss": 1.6092,
      "step": 875
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.253502130508423,
      "learning_rate": 0.00019784953787242495,
      "loss": 2.462,
      "step": 876
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.672528862953186,
      "learning_rate": 0.00019784458650854694,
      "loss": 1.9586,
      "step": 877
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.42753005027771,
      "learning_rate": 0.0001978396295131501,
      "loss": 1.8259,
      "step": 878
    },
    {
      "epoch": 0.07,
      "grad_norm": 5.9024834632873535,
      "learning_rate": 0.00019783466688651975,
      "loss": 2.0192,
      "step": 879
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4918692111968994,
      "learning_rate": 0.0001978296986289415,
      "loss": 1.0846,
      "step": 880
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3415610790252686,
      "learning_rate": 0.00019782472474070133,
      "loss": 0.9789,
      "step": 881
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9864740371704102,
      "learning_rate": 0.0001978197452220855,
      "loss": 1.6093,
      "step": 882
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.04390811920166,
      "learning_rate": 0.00019781476007338058,
      "loss": 0.9898,
      "step": 883
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7151050567626953,
      "learning_rate": 0.00019780976929487353,
      "loss": 2.308,
      "step": 884
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.459247350692749,
      "learning_rate": 0.0001978047728868516,
      "loss": 1.5573,
      "step": 885
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1675251722335815,
      "learning_rate": 0.00019779977084960232,
      "loss": 2.1981,
      "step": 886
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8504819869995117,
      "learning_rate": 0.00019779476318341365,
      "loss": 2.0112,
      "step": 887
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9052510261535645,
      "learning_rate": 0.00019778974988857376,
      "loss": 1.5079,
      "step": 888
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7742902636528015,
      "learning_rate": 0.00019778473096537121,
      "loss": 1.8655,
      "step": 889
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.395048975944519,
      "learning_rate": 0.00019777970641409487,
      "loss": 2.1335,
      "step": 890
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.164719820022583,
      "learning_rate": 0.0001977746762350339,
      "loss": 1.5476,
      "step": 891
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1120685338974,
      "learning_rate": 0.0001977696404284779,
      "loss": 1.5321,
      "step": 892
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0337257385253906,
      "learning_rate": 0.00019776459899471663,
      "loss": 2.2715,
      "step": 893
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4613476991653442,
      "learning_rate": 0.0001977595519340403,
      "loss": 1.5414,
      "step": 894
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0848426818847656,
      "learning_rate": 0.00019775449924673934,
      "loss": 1.4465,
      "step": 895
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0440407991409302,
      "learning_rate": 0.0001977494409331046,
      "loss": 1.2991,
      "step": 896
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9436949491500854,
      "learning_rate": 0.00019774437699342725,
      "loss": 2.0004,
      "step": 897
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8376148343086243,
      "learning_rate": 0.0001977393074279987,
      "loss": 1.3815,
      "step": 898
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1853795051574707,
      "learning_rate": 0.00019773423223711073,
      "loss": 1.7578,
      "step": 899
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3023202419281006,
      "learning_rate": 0.00019772915142105544,
      "loss": 1.8618,
      "step": 900
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6943494081497192,
      "learning_rate": 0.0001977240649801253,
      "loss": 1.2885,
      "step": 901
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1801667213439941,
      "learning_rate": 0.00019771897291461304,
      "loss": 2.1808,
      "step": 902
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9751922488212585,
      "learning_rate": 0.00019771387522481178,
      "loss": 1.624,
      "step": 903
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0974544286727905,
      "learning_rate": 0.00019770877191101484,
      "loss": 1.6757,
      "step": 904
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0093085765838623,
      "learning_rate": 0.000197703662973516,
      "loss": 2.0584,
      "step": 905
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3770114183425903,
      "learning_rate": 0.0001976985484126093,
      "loss": 1.6034,
      "step": 906
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9668818116188049,
      "learning_rate": 0.00019769342822858912,
      "loss": 1.4713,
      "step": 907
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7558382749557495,
      "learning_rate": 0.00019768830242175014,
      "loss": 1.2633,
      "step": 908
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7413291335105896,
      "learning_rate": 0.0001976831709923874,
      "loss": 1.5048,
      "step": 909
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0780562162399292,
      "learning_rate": 0.00019767803394079615,
      "loss": 2.0013,
      "step": 910
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9353559017181396,
      "learning_rate": 0.00019767289126727223,
      "loss": 1.8763,
      "step": 911
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8285757303237915,
      "learning_rate": 0.0001976677429721115,
      "loss": 1.8688,
      "step": 912
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8770428895950317,
      "learning_rate": 0.0001976625890556103,
      "loss": 1.4826,
      "step": 913
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.757835865020752,
      "learning_rate": 0.00019765742951806527,
      "loss": 0.9859,
      "step": 914
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.178598642349243,
      "learning_rate": 0.00019765226435977342,
      "loss": 1.5254,
      "step": 915
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0232371091842651,
      "learning_rate": 0.00019764709358103196,
      "loss": 1.9086,
      "step": 916
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1875590085983276,
      "learning_rate": 0.00019764191718213856,
      "loss": 1.7148,
      "step": 917
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9897807836532593,
      "learning_rate": 0.0001976367351633911,
      "loss": 1.7499,
      "step": 918
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9599157571792603,
      "learning_rate": 0.00019763154752508786,
      "loss": 1.8216,
      "step": 919
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4207431077957153,
      "learning_rate": 0.0001976263542675274,
      "loss": 1.8926,
      "step": 920
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.328608512878418,
      "learning_rate": 0.00019762115539100867,
      "loss": 2.5248,
      "step": 921
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1183234453201294,
      "learning_rate": 0.00019761595089583085,
      "loss": 2.1223,
      "step": 922
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.158046722412109,
      "learning_rate": 0.0001976107407822935,
      "loss": 2.6758,
      "step": 923
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.970327615737915,
      "learning_rate": 0.00019760552505069653,
      "loss": 2.2783,
      "step": 924
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3675352334976196,
      "learning_rate": 0.00019760030370134004,
      "loss": 2.2058,
      "step": 925
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7089325189590454,
      "learning_rate": 0.0001975950767345246,
      "loss": 1.1028,
      "step": 926
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0211254358291626,
      "learning_rate": 0.00019758984415055113,
      "loss": 1.9132,
      "step": 927
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7714298963546753,
      "learning_rate": 0.00019758460594972068,
      "loss": 1.2883,
      "step": 928
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.116025924682617,
      "learning_rate": 0.00019757936213233478,
      "loss": 1.901,
      "step": 929
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.1210893392562866,
      "learning_rate": 0.00019757411269869527,
      "loss": 1.1067,
      "step": 930
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8600870370864868,
      "learning_rate": 0.00019756885764910423,
      "loss": 1.1103,
      "step": 931
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.19758141040802,
      "learning_rate": 0.0001975635969838642,
      "loss": 1.4363,
      "step": 932
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9861646890640259,
      "learning_rate": 0.00019755833070327788,
      "loss": 1.7652,
      "step": 933
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8907842040061951,
      "learning_rate": 0.00019755305880764842,
      "loss": 0.9553,
      "step": 934
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.02144193649292,
      "learning_rate": 0.0001975477812972792,
      "loss": 1.7182,
      "step": 935
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3516772985458374,
      "learning_rate": 0.00019754249817247402,
      "loss": 1.2366,
      "step": 936
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7376115918159485,
      "learning_rate": 0.00019753720943353696,
      "loss": 1.9712,
      "step": 937
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3184542655944824,
      "learning_rate": 0.00019753191508077236,
      "loss": 1.8809,
      "step": 938
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8736258745193481,
      "learning_rate": 0.00019752661511448502,
      "loss": 1.556,
      "step": 939
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8025434017181396,
      "learning_rate": 0.0001975213095349799,
      "loss": 1.501,
      "step": 940
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7753646373748779,
      "learning_rate": 0.00019751599834256242,
      "loss": 1.7595,
      "step": 941
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7645821571350098,
      "learning_rate": 0.00019751068153753826,
      "loss": 1.7582,
      "step": 942
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1334235668182373,
      "learning_rate": 0.00019750535912021344,
      "loss": 1.4682,
      "step": 943
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.143097162246704,
      "learning_rate": 0.00019750003109089427,
      "loss": 2.0503,
      "step": 944
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2060073614120483,
      "learning_rate": 0.0001974946974498874,
      "loss": 2.0707,
      "step": 945
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9279448986053467,
      "learning_rate": 0.00019748935819749987,
      "loss": 1.3786,
      "step": 946
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7886576056480408,
      "learning_rate": 0.00019748401333403895,
      "loss": 1.6546,
      "step": 947
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9880440831184387,
      "learning_rate": 0.00019747866285981224,
      "loss": 1.7795,
      "step": 948
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.7780110836029053,
      "learning_rate": 0.00019747330677512777,
      "loss": 1.8818,
      "step": 949
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9402052760124207,
      "learning_rate": 0.00019746794508029373,
      "loss": 1.7002,
      "step": 950
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9718322157859802,
      "learning_rate": 0.00019746257777561872,
      "loss": 1.5353,
      "step": 951
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8860544562339783,
      "learning_rate": 0.00019745720486141172,
      "loss": 1.6715,
      "step": 952
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.0250792503356934,
      "learning_rate": 0.00019745182633798194,
      "loss": 1.7727,
      "step": 953
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.448195457458496,
      "learning_rate": 0.00019744644220563893,
      "loss": 1.7615,
      "step": 954
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8781421780586243,
      "learning_rate": 0.00019744105246469263,
      "loss": 1.5432,
      "step": 955
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8149746656417847,
      "learning_rate": 0.00019743565711545315,
      "loss": 1.589,
      "step": 956
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.407163381576538,
      "learning_rate": 0.00019743025615823114,
      "loss": 1.5566,
      "step": 957
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.34011709690094,
      "learning_rate": 0.00019742484959333739,
      "loss": 1.5415,
      "step": 958
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8413359522819519,
      "learning_rate": 0.00019741943742108305,
      "loss": 1.7095,
      "step": 959
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.6450982689857483,
      "learning_rate": 0.00019741401964177972,
      "loss": 1.6596,
      "step": 960
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6175624132156372,
      "learning_rate": 0.00019740859625573916,
      "loss": 1.8091,
      "step": 961
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2629435062408447,
      "learning_rate": 0.0001974031672632735,
      "loss": 1.3899,
      "step": 962
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0034892559051514,
      "learning_rate": 0.00019739773266469522,
      "loss": 1.8755,
      "step": 963
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4857910871505737,
      "learning_rate": 0.00019739229246031718,
      "loss": 2.2288,
      "step": 964
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.159600853919983,
      "learning_rate": 0.0001973868466504524,
      "loss": 2.0031,
      "step": 965
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8885807394981384,
      "learning_rate": 0.00019738139523541436,
      "loss": 2.3508,
      "step": 966
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3226993083953857,
      "learning_rate": 0.00019737593821551682,
      "loss": 2.276,
      "step": 967
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4353457689285278,
      "learning_rate": 0.00019737047559107392,
      "loss": 1.5113,
      "step": 968
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.126673936843872,
      "learning_rate": 0.00019736500736239993,
      "loss": 1.5343,
      "step": 969
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.30476713180542,
      "learning_rate": 0.0001973595335298097,
      "loss": 2.4502,
      "step": 970
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2559471130371094,
      "learning_rate": 0.0001973540540936182,
      "loss": 1.7322,
      "step": 971
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8944337964057922,
      "learning_rate": 0.00019734856905414088,
      "loss": 1.3027,
      "step": 972
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5116291046142578,
      "learning_rate": 0.00019734307841169337,
      "loss": 1.8703,
      "step": 973
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5329183340072632,
      "learning_rate": 0.00019733758216659172,
      "loss": 1.9372,
      "step": 974
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8626140356063843,
      "learning_rate": 0.00019733208031915228,
      "loss": 1.6755,
      "step": 975
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9986488223075867,
      "learning_rate": 0.00019732657286969168,
      "loss": 1.9316,
      "step": 976
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6704440116882324,
      "learning_rate": 0.00019732105981852693,
      "loss": 1.9724,
      "step": 977
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.016601324081421,
      "learning_rate": 0.00019731554116597533,
      "loss": 1.6954,
      "step": 978
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.264439582824707,
      "learning_rate": 0.00019731001691235449,
      "loss": 1.8768,
      "step": 979
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.8804512023925781,
      "learning_rate": 0.00019730448705798239,
      "loss": 1.6091,
      "step": 980
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.9653787612915039,
      "learning_rate": 0.0001972989516031773,
      "loss": 1.6749,
      "step": 981
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4916048049926758,
      "learning_rate": 0.00019729341054825782,
      "loss": 2.0026,
      "step": 982
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5243887901306152,
      "learning_rate": 0.00019728786389354287,
      "loss": 1.5465,
      "step": 983
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5369163751602173,
      "learning_rate": 0.00019728231163935168,
      "loss": 1.7124,
      "step": 984
    },
    {
      "epoch": 0.08,
      "grad_norm": 4.517394065856934,
      "learning_rate": 0.0001972767537860038,
      "loss": 1.7731,
      "step": 985
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8525301814079285,
      "learning_rate": 0.00019727119033381918,
      "loss": 1.8226,
      "step": 986
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.1923301219940186,
      "learning_rate": 0.00019726562128311796,
      "loss": 2.3212,
      "step": 987
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5405166149139404,
      "learning_rate": 0.0001972600466342207,
      "loss": 1.5542,
      "step": 988
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1624740362167358,
      "learning_rate": 0.00019725446638744828,
      "loss": 1.821,
      "step": 989
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.903938353061676,
      "learning_rate": 0.00019724888054312178,
      "loss": 1.9075,
      "step": 990
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1348859071731567,
      "learning_rate": 0.0001972432891015628,
      "loss": 1.6234,
      "step": 991
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.820160984992981,
      "learning_rate": 0.00019723769206309312,
      "loss": 1.8184,
      "step": 992
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1184098720550537,
      "learning_rate": 0.0001972320894280349,
      "loss": 1.8487,
      "step": 993
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8746883869171143,
      "learning_rate": 0.00019722648119671057,
      "loss": 1.7167,
      "step": 994
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7073476314544678,
      "learning_rate": 0.0001972208673694429,
      "loss": 2.0847,
      "step": 995
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7132796049118042,
      "learning_rate": 0.0001972152479465551,
      "loss": 2.6406,
      "step": 996
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4833531379699707,
      "learning_rate": 0.00019720962292837048,
      "loss": 1.4243,
      "step": 997
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9135710000991821,
      "learning_rate": 0.00019720399231521287,
      "loss": 2.1265,
      "step": 998
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.474632978439331,
      "learning_rate": 0.0001971983561074063,
      "loss": 1.726,
      "step": 999
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6065622568130493,
      "learning_rate": 0.0001971927143052752,
      "loss": 1.88,
      "step": 1000
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.0712621212005615,
      "learning_rate": 0.00019718706690914428,
      "loss": 1.61,
      "step": 1001
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9234281778335571,
      "learning_rate": 0.00019718141391933855,
      "loss": 1.2567,
      "step": 1002
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4607878923416138,
      "learning_rate": 0.0001971757553361834,
      "loss": 1.8561,
      "step": 1003
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9247038960456848,
      "learning_rate": 0.00019717009116000452,
      "loss": 1.4694,
      "step": 1004
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2830522060394287,
      "learning_rate": 0.0001971644213911279,
      "loss": 1.5489,
      "step": 1005
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.733462929725647,
      "learning_rate": 0.00019715874602987987,
      "loss": 1.8224,
      "step": 1006
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9097613096237183,
      "learning_rate": 0.00019715306507658706,
      "loss": 1.2101,
      "step": 1007
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.183960437774658,
      "learning_rate": 0.0001971473785315765,
      "loss": 1.8153,
      "step": 1008
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4547712802886963,
      "learning_rate": 0.00019714168639517544,
      "loss": 1.7184,
      "step": 1009
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.085234522819519,
      "learning_rate": 0.0001971359886677115,
      "loss": 1.3811,
      "step": 1010
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2937395572662354,
      "learning_rate": 0.0001971302853495126,
      "loss": 2.4155,
      "step": 1011
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.540176510810852,
      "learning_rate": 0.00019712457644090704,
      "loss": 1.3699,
      "step": 1012
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3980220556259155,
      "learning_rate": 0.00019711886194222338,
      "loss": 2.3483,
      "step": 1013
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.797552227973938,
      "learning_rate": 0.0001971131418537905,
      "loss": 1.4503,
      "step": 1014
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1276936531066895,
      "learning_rate": 0.00019710741617593767,
      "loss": 1.4863,
      "step": 1015
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9931398630142212,
      "learning_rate": 0.00019710168490899438,
      "loss": 1.8027,
      "step": 1016
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3622077703475952,
      "learning_rate": 0.00019709594805329057,
      "loss": 1.9488,
      "step": 1017
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0623769760131836,
      "learning_rate": 0.00019709020560915635,
      "loss": 1.7218,
      "step": 1018
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7983946204185486,
      "learning_rate": 0.0001970844575769223,
      "loss": 1.5256,
      "step": 1019
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8897712826728821,
      "learning_rate": 0.00019707870395691918,
      "loss": 3.0461,
      "step": 1020
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3808753490447998,
      "learning_rate": 0.0001970729447494782,
      "loss": 1.4816,
      "step": 1021
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8726938962936401,
      "learning_rate": 0.00019706717995493086,
      "loss": 2.087,
      "step": 1022
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0694204568862915,
      "learning_rate": 0.00019706140957360886,
      "loss": 1.7647,
      "step": 1023
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8208509087562561,
      "learning_rate": 0.0001970556336058444,
      "loss": 2.0781,
      "step": 1024
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.029175043106079,
      "learning_rate": 0.0001970498520519699,
      "loss": 2.0924,
      "step": 1025
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1001254320144653,
      "learning_rate": 0.0001970440649123181,
      "loss": 1.8238,
      "step": 1026
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.11354660987854,
      "learning_rate": 0.0001970382721872221,
      "loss": 1.2547,
      "step": 1027
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.180396318435669,
      "learning_rate": 0.00019703247387701533,
      "loss": 2.3469,
      "step": 1028
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7945447564125061,
      "learning_rate": 0.00019702666998203146,
      "loss": 1.4318,
      "step": 1029
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1523860692977905,
      "learning_rate": 0.00019702086050260456,
      "loss": 2.0279,
      "step": 1030
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0859284400939941,
      "learning_rate": 0.00019701504543906905,
      "loss": 1.331,
      "step": 1031
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8142756223678589,
      "learning_rate": 0.00019700922479175956,
      "loss": 1.8202,
      "step": 1032
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9446786642074585,
      "learning_rate": 0.00019700339856101114,
      "loss": 1.1092,
      "step": 1033
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3510711193084717,
      "learning_rate": 0.00019699756674715908,
      "loss": 1.5508,
      "step": 1034
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0520813465118408,
      "learning_rate": 0.00019699172935053904,
      "loss": 1.7838,
      "step": 1035
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9613474607467651,
      "learning_rate": 0.00019698588637148703,
      "loss": 2.146,
      "step": 1036
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8260915279388428,
      "learning_rate": 0.00019698003781033933,
      "loss": 0.8514,
      "step": 1037
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2634867429733276,
      "learning_rate": 0.0001969741836674326,
      "loss": 1.7116,
      "step": 1038
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1844093799591064,
      "learning_rate": 0.00019696832394310368,
      "loss": 1.9418,
      "step": 1039
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0950504541397095,
      "learning_rate": 0.0001969624586376899,
      "loss": 1.8885,
      "step": 1040
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8122687935829163,
      "learning_rate": 0.00019695658775152882,
      "loss": 1.8131,
      "step": 1041
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.437365770339966,
      "learning_rate": 0.00019695071128495839,
      "loss": 2.0631,
      "step": 1042
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.811484456062317,
      "learning_rate": 0.0001969448292383168,
      "loss": 1.2319,
      "step": 1043
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8990453481674194,
      "learning_rate": 0.00019693894161194254,
      "loss": 1.4183,
      "step": 1044
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8370931148529053,
      "learning_rate": 0.00019693304840617457,
      "loss": 1.7258,
      "step": 1045
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1424763202667236,
      "learning_rate": 0.00019692714962135204,
      "loss": 1.912,
      "step": 1046
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.9868323802948,
      "learning_rate": 0.00019692124525781446,
      "loss": 1.5971,
      "step": 1047
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.386628270149231,
      "learning_rate": 0.00019691533531590163,
      "loss": 1.5047,
      "step": 1048
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5201491117477417,
      "learning_rate": 0.00019690941979595374,
      "loss": 1.1868,
      "step": 1049
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3586597442626953,
      "learning_rate": 0.00019690349869831128,
      "loss": 1.6431,
      "step": 1050
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4572842121124268,
      "learning_rate": 0.000196897572023315,
      "loss": 2.2401,
      "step": 1051
    },
    {
      "epoch": 0.08,
      "grad_norm": 4.93280029296875,
      "learning_rate": 0.000196891639771306,
      "loss": 2.5237,
      "step": 1052
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.6911733150482178,
      "learning_rate": 0.0001968857019426258,
      "loss": 1.4952,
      "step": 1053
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.192158818244934,
      "learning_rate": 0.00019687975853761606,
      "loss": 1.6679,
      "step": 1054
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7423650622367859,
      "learning_rate": 0.0001968738095566189,
      "loss": 1.8052,
      "step": 1055
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7916703224182129,
      "learning_rate": 0.0001968678549999767,
      "loss": 1.7223,
      "step": 1056
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0062793493270874,
      "learning_rate": 0.0001968618948680322,
      "loss": 1.3274,
      "step": 1057
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.805494546890259,
      "learning_rate": 0.00019685592916112847,
      "loss": 0.9674,
      "step": 1058
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2609490156173706,
      "learning_rate": 0.00019684995787960883,
      "loss": 1.8828,
      "step": 1059
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0806233882904053,
      "learning_rate": 0.00019684398102381694,
      "loss": 1.1057,
      "step": 1060
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9028769731521606,
      "learning_rate": 0.00019683799859409684,
      "loss": 2.1279,
      "step": 1061
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1279006004333496,
      "learning_rate": 0.00019683201059079287,
      "loss": 1.5322,
      "step": 1062
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2182273864746094,
      "learning_rate": 0.0001968260170142496,
      "loss": 1.9263,
      "step": 1063
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8948494791984558,
      "learning_rate": 0.00019682001786481204,
      "loss": 1.4784,
      "step": 1064
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6708674430847168,
      "learning_rate": 0.0001968140131428255,
      "loss": 2.0941,
      "step": 1065
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2462337017059326,
      "learning_rate": 0.00019680800284863556,
      "loss": 1.66,
      "step": 1066
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.183008074760437,
      "learning_rate": 0.00019680198698258816,
      "loss": 1.6316,
      "step": 1067
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1155831813812256,
      "learning_rate": 0.00019679596554502952,
      "loss": 2.1803,
      "step": 1068
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0008281469345093,
      "learning_rate": 0.00019678993853630624,
      "loss": 1.3901,
      "step": 1069
    },
    {
      "epoch": 0.08,
      "grad_norm": 4.078868389129639,
      "learning_rate": 0.0001967839059567652,
      "loss": 2.4179,
      "step": 1070
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2592445611953735,
      "learning_rate": 0.00019677786780675354,
      "loss": 1.0168,
      "step": 1071
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9781190156936646,
      "learning_rate": 0.00019677182408661895,
      "loss": 2.0043,
      "step": 1072
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0124030113220215,
      "learning_rate": 0.00019676577479670914,
      "loss": 1.3854,
      "step": 1073
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8727971315383911,
      "learning_rate": 0.00019675971993737232,
      "loss": 1.6763,
      "step": 1074
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4953126907348633,
      "learning_rate": 0.000196753659508957,
      "loss": 2.1067,
      "step": 1075
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.20857834815979,
      "learning_rate": 0.00019674759351181198,
      "loss": 1.8816,
      "step": 1076
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1168086528778076,
      "learning_rate": 0.00019674152194628638,
      "loss": 1.5446,
      "step": 1077
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0908111333847046,
      "learning_rate": 0.0001967354448127297,
      "loss": 1.9952,
      "step": 1078
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2420053482055664,
      "learning_rate": 0.00019672936211149168,
      "loss": 1.7036,
      "step": 1079
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4361591339111328,
      "learning_rate": 0.0001967232738429224,
      "loss": 2.3847,
      "step": 1080
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.297052025794983,
      "learning_rate": 0.0001967171800073723,
      "loss": 1.4943,
      "step": 1081
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0163664817810059,
      "learning_rate": 0.0001967110806051921,
      "loss": 2.1457,
      "step": 1082
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0676305294036865,
      "learning_rate": 0.00019670497563673288,
      "loss": 2.0478,
      "step": 1083
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9745421409606934,
      "learning_rate": 0.00019669886510234597,
      "loss": 1.8333,
      "step": 1084
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9407121539115906,
      "learning_rate": 0.0001966927490023831,
      "loss": 1.7045,
      "step": 1085
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7390350103378296,
      "learning_rate": 0.0001966866273371963,
      "loss": 1.6253,
      "step": 1086
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2957210540771484,
      "learning_rate": 0.00019668050010713788,
      "loss": 1.3933,
      "step": 1087
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8012015223503113,
      "learning_rate": 0.0001966743673125605,
      "loss": 1.8768,
      "step": 1088
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7582695484161377,
      "learning_rate": 0.00019666822895381716,
      "loss": 1.6203,
      "step": 1089
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9301621317863464,
      "learning_rate": 0.00019666208503126112,
      "loss": 1.3859,
      "step": 1090
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.431381940841675,
      "learning_rate": 0.00019665593554524604,
      "loss": 2.852,
      "step": 1091
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.872393786907196,
      "learning_rate": 0.00019664978049612584,
      "loss": 1.3405,
      "step": 1092
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9922826290130615,
      "learning_rate": 0.00019664361988425477,
      "loss": 1.8971,
      "step": 1093
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8852066993713379,
      "learning_rate": 0.0001966374537099874,
      "loss": 1.7549,
      "step": 1094
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.054330825805664,
      "learning_rate": 0.00019663128197367865,
      "loss": 2.302,
      "step": 1095
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7639527916908264,
      "learning_rate": 0.00019662510467568372,
      "loss": 1.443,
      "step": 1096
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.177476167678833,
      "learning_rate": 0.00019661892181635818,
      "loss": 1.4903,
      "step": 1097
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2513220310211182,
      "learning_rate": 0.0001966127333960579,
      "loss": 1.679,
      "step": 1098
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5642998218536377,
      "learning_rate": 0.00019660653941513902,
      "loss": 1.7594,
      "step": 1099
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7794055342674255,
      "learning_rate": 0.00019660033987395803,
      "loss": 2.1983,
      "step": 1100
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2909926176071167,
      "learning_rate": 0.00019659413477287173,
      "loss": 1.7492,
      "step": 1101
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2489957809448242,
      "learning_rate": 0.00019658792411223736,
      "loss": 1.6061,
      "step": 1102
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3933746814727783,
      "learning_rate": 0.0001965817078924123,
      "loss": 1.2208,
      "step": 1103
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7769758701324463,
      "learning_rate": 0.00019657548611375436,
      "loss": 1.5385,
      "step": 1104
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0747740268707275,
      "learning_rate": 0.0001965692587766216,
      "loss": 1.9158,
      "step": 1105
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9821624755859375,
      "learning_rate": 0.00019656302588137248,
      "loss": 1.9688,
      "step": 1106
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.8094685673713684,
      "learning_rate": 0.00019655678742836572,
      "loss": 1.6268,
      "step": 1107
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3690413236618042,
      "learning_rate": 0.00019655054341796038,
      "loss": 2.1188,
      "step": 1108
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.9380735754966736,
      "learning_rate": 0.00019654429385051583,
      "loss": 1.9619,
      "step": 1109
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.946852445602417,
      "learning_rate": 0.00019653803872639178,
      "loss": 2.6392,
      "step": 1110
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0638010501861572,
      "learning_rate": 0.00019653177804594826,
      "loss": 2.3132,
      "step": 1111
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0649924278259277,
      "learning_rate": 0.0001965255118095456,
      "loss": 1.8542,
      "step": 1112
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7608064413070679,
      "learning_rate": 0.00019651924001754445,
      "loss": 1.1908,
      "step": 1113
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.867633044719696,
      "learning_rate": 0.00019651296267030578,
      "loss": 1.7645,
      "step": 1114
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2243244647979736,
      "learning_rate": 0.00019650667976819092,
      "loss": 2.1253,
      "step": 1115
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5399110317230225,
      "learning_rate": 0.00019650039131156145,
      "loss": 1.86,
      "step": 1116
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1012349128723145,
      "learning_rate": 0.00019649409730077935,
      "loss": 1.8717,
      "step": 1117
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.856971025466919,
      "learning_rate": 0.00019648779773620684,
      "loss": 1.4006,
      "step": 1118
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.269486904144287,
      "learning_rate": 0.00019648149261820649,
      "loss": 1.5644,
      "step": 1119
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4340267181396484,
      "learning_rate": 0.00019647518194714124,
      "loss": 2.3453,
      "step": 1120
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.9380342960357666,
      "learning_rate": 0.00019646886572337424,
      "loss": 3.0371,
      "step": 1121
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0270600318908691,
      "learning_rate": 0.00019646254394726907,
      "loss": 1.6306,
      "step": 1122
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5616741180419922,
      "learning_rate": 0.0001964562166191896,
      "loss": 1.482,
      "step": 1123
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6575204133987427,
      "learning_rate": 0.00019644988373949994,
      "loss": 2.4243,
      "step": 1124
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4440926313400269,
      "learning_rate": 0.0001964435453085647,
      "loss": 1.8835,
      "step": 1125
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.322563886642456,
      "learning_rate": 0.00019643720132674856,
      "loss": 2.1507,
      "step": 1126
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.836170494556427,
      "learning_rate": 0.00019643085179441673,
      "loss": 2.1566,
      "step": 1127
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3648782968521118,
      "learning_rate": 0.0001964244967119346,
      "loss": 1.5391,
      "step": 1128
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5141761302948,
      "learning_rate": 0.00019641813607966803,
      "loss": 3.3147,
      "step": 1129
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3985037803649902,
      "learning_rate": 0.00019641176989798305,
      "loss": 1.8514,
      "step": 1130
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.102832555770874,
      "learning_rate": 0.0001964053981672461,
      "loss": 1.6705,
      "step": 1131
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0584547519683838,
      "learning_rate": 0.00019639902088782387,
      "loss": 1.6527,
      "step": 1132
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0327388048171997,
      "learning_rate": 0.0001963926380600835,
      "loss": 1.2578,
      "step": 1133
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9700140953063965,
      "learning_rate": 0.00019638624968439222,
      "loss": 1.8837,
      "step": 1134
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0261149406433105,
      "learning_rate": 0.00019637985576111778,
      "loss": 1.7109,
      "step": 1135
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8239877223968506,
      "learning_rate": 0.00019637345629062826,
      "loss": 1.7915,
      "step": 1136
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2542163133621216,
      "learning_rate": 0.0001963670512732919,
      "loss": 1.8776,
      "step": 1137
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0960030555725098,
      "learning_rate": 0.00019636064070947736,
      "loss": 1.7195,
      "step": 1138
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4278427362442017,
      "learning_rate": 0.00019635422459955365,
      "loss": 2.4473,
      "step": 1139
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8783361911773682,
      "learning_rate": 0.00019634780294388996,
      "loss": 1.4403,
      "step": 1140
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4415777921676636,
      "learning_rate": 0.000196341375742856,
      "loss": 1.4324,
      "step": 1141
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.841482400894165,
      "learning_rate": 0.00019633494299682163,
      "loss": 1.4126,
      "step": 1142
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3746585845947266,
      "learning_rate": 0.00019632850470615706,
      "loss": 2.4526,
      "step": 1143
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7990822792053223,
      "learning_rate": 0.00019632206087123296,
      "loss": 1.8986,
      "step": 1144
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8228871822357178,
      "learning_rate": 0.00019631561149242011,
      "loss": 1.1934,
      "step": 1145
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1393336057662964,
      "learning_rate": 0.00019630915657008977,
      "loss": 1.3339,
      "step": 1146
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0545927286148071,
      "learning_rate": 0.00019630269610461344,
      "loss": 2.2701,
      "step": 1147
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1239838600158691,
      "learning_rate": 0.00019629623009636294,
      "loss": 1.3016,
      "step": 1148
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3616957664489746,
      "learning_rate": 0.00019628975854571042,
      "loss": 1.5606,
      "step": 1149
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8961825966835022,
      "learning_rate": 0.00019628328145302837,
      "loss": 2.0135,
      "step": 1150
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3841980695724487,
      "learning_rate": 0.00019627679881868958,
      "loss": 1.6453,
      "step": 1151
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9988549947738647,
      "learning_rate": 0.0001962703106430672,
      "loss": 1.2755,
      "step": 1152
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7383997440338135,
      "learning_rate": 0.00019626381692653463,
      "loss": 1.8526,
      "step": 1153
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9171084761619568,
      "learning_rate": 0.00019625731766946558,
      "loss": 1.9721,
      "step": 1154
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2907060384750366,
      "learning_rate": 0.0001962508128722342,
      "loss": 1.1064,
      "step": 1155
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1835438013076782,
      "learning_rate": 0.0001962443025352148,
      "loss": 1.652,
      "step": 1156
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2583519220352173,
      "learning_rate": 0.00019623778665878215,
      "loss": 2.0974,
      "step": 1157
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5483134984970093,
      "learning_rate": 0.00019623126524331125,
      "loss": 1.4107,
      "step": 1158
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7472237348556519,
      "learning_rate": 0.00019622473828917746,
      "loss": 1.4777,
      "step": 1159
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8828908205032349,
      "learning_rate": 0.0001962182057967564,
      "loss": 2.1029,
      "step": 1160
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5681816339492798,
      "learning_rate": 0.0001962116677664241,
      "loss": 2.5508,
      "step": 1161
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0022519826889038,
      "learning_rate": 0.00019620512419855684,
      "loss": 1.6866,
      "step": 1162
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1082496643066406,
      "learning_rate": 0.00019619857509353125,
      "loss": 1.4806,
      "step": 1163
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.190071702003479,
      "learning_rate": 0.00019619202045172428,
      "loss": 1.921,
      "step": 1164
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5783767700195312,
      "learning_rate": 0.00019618546027351316,
      "loss": 2.1725,
      "step": 1165
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3556395769119263,
      "learning_rate": 0.00019617889455927546,
      "loss": 1.503,
      "step": 1166
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9035778641700745,
      "learning_rate": 0.0001961723233093891,
      "loss": 1.801,
      "step": 1167
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.189590573310852,
      "learning_rate": 0.00019616574652423229,
      "loss": 1.8425,
      "step": 1168
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8131442070007324,
      "learning_rate": 0.00019615916420418354,
      "loss": 2.1052,
      "step": 1169
    },
    {
      "epoch": 0.09,
      "grad_norm": 4.744234085083008,
      "learning_rate": 0.00019615257634962173,
      "loss": 2.7376,
      "step": 1170
    },
    {
      "epoch": 0.09,
      "grad_norm": 5.219825267791748,
      "learning_rate": 0.000196145982960926,
      "loss": 2.4899,
      "step": 1171
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.047334909439087,
      "learning_rate": 0.00019613938403847587,
      "loss": 1.6815,
      "step": 1172
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9472983479499817,
      "learning_rate": 0.00019613277958265113,
      "loss": 1.953,
      "step": 1173
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8085423707962036,
      "learning_rate": 0.0001961261695938319,
      "loss": 1.7147,
      "step": 1174
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8676409125328064,
      "learning_rate": 0.0001961195540723986,
      "loss": 1.8084,
      "step": 1175
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8678042888641357,
      "learning_rate": 0.00019611293301873207,
      "loss": 1.832,
      "step": 1176
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4674674272537231,
      "learning_rate": 0.0001961063064332133,
      "loss": 1.1114,
      "step": 1177
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8783338665962219,
      "learning_rate": 0.00019609967431622374,
      "loss": 1.3892,
      "step": 1178
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1932621002197266,
      "learning_rate": 0.0001960930366681451,
      "loss": 1.9694,
      "step": 1179
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8598819971084595,
      "learning_rate": 0.0001960863934893594,
      "loss": 2.0212,
      "step": 1180
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.021093487739563,
      "learning_rate": 0.00019607974478024897,
      "loss": 2.2,
      "step": 1181
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7676541805267334,
      "learning_rate": 0.00019607309054119653,
      "loss": 1.5804,
      "step": 1182
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7492783069610596,
      "learning_rate": 0.00019606643077258506,
      "loss": 1.5126,
      "step": 1183
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7587288618087769,
      "learning_rate": 0.00019605976547479786,
      "loss": 2.1042,
      "step": 1184
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9091061353683472,
      "learning_rate": 0.00019605309464821856,
      "loss": 1.8163,
      "step": 1185
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2144639492034912,
      "learning_rate": 0.00019604641829323107,
      "loss": 1.9921,
      "step": 1186
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9933616518974304,
      "learning_rate": 0.00019603973641021968,
      "loss": 1.9844,
      "step": 1187
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.271286964416504,
      "learning_rate": 0.00019603304899956902,
      "loss": 1.4809,
      "step": 1188
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1706914901733398,
      "learning_rate": 0.0001960263560616639,
      "loss": 1.8563,
      "step": 1189
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8069616556167603,
      "learning_rate": 0.00019601965759688962,
      "loss": 1.4395,
      "step": 1190
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.33027720451355,
      "learning_rate": 0.00019601295360563163,
      "loss": 2.0825,
      "step": 1191
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.548858880996704,
      "learning_rate": 0.00019600624408827586,
      "loss": 1.2956,
      "step": 1192
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9299944639205933,
      "learning_rate": 0.00019599952904520844,
      "loss": 1.6608,
      "step": 1193
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2038675546646118,
      "learning_rate": 0.00019599280847681585,
      "loss": 1.9264,
      "step": 1194
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4442802667617798,
      "learning_rate": 0.00019598608238348493,
      "loss": 1.9584,
      "step": 1195
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.186059832572937,
      "learning_rate": 0.00019597935076560278,
      "loss": 2.0564,
      "step": 1196
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1937203407287598,
      "learning_rate": 0.00019597261362355687,
      "loss": 1.4995,
      "step": 1197
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4357693195343018,
      "learning_rate": 0.00019596587095773495,
      "loss": 2.3336,
      "step": 1198
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2524091005325317,
      "learning_rate": 0.00019595912276852508,
      "loss": 0.868,
      "step": 1199
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0768048763275146,
      "learning_rate": 0.0001959523690563157,
      "loss": 2.071,
      "step": 1200
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.3540523052215576,
      "learning_rate": 0.00019594560982149546,
      "loss": 1.9702,
      "step": 1201
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9751978516578674,
      "learning_rate": 0.00019593884506445344,
      "loss": 1.7036,
      "step": 1202
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.488834023475647,
      "learning_rate": 0.00019593207478557897,
      "loss": 1.8981,
      "step": 1203
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.064865231513977,
      "learning_rate": 0.00019592529898526174,
      "loss": 1.4776,
      "step": 1204
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9290183782577515,
      "learning_rate": 0.00019591851766389176,
      "loss": 1.3928,
      "step": 1205
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9312964677810669,
      "learning_rate": 0.00019591173082185925,
      "loss": 1.5984,
      "step": 1206
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9303932189941406,
      "learning_rate": 0.0001959049384595549,
      "loss": 2.0009,
      "step": 1207
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2921702861785889,
      "learning_rate": 0.00019589814057736966,
      "loss": 1.5282,
      "step": 1208
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9750175476074219,
      "learning_rate": 0.0001958913371756947,
      "loss": 1.6234,
      "step": 1209
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9658408761024475,
      "learning_rate": 0.0001958845282549217,
      "loss": 1.9492,
      "step": 1210
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9806130528450012,
      "learning_rate": 0.0001958777138154425,
      "loss": 1.6596,
      "step": 1211
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9232090711593628,
      "learning_rate": 0.00019587089385764928,
      "loss": 1.523,
      "step": 1212
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1587127447128296,
      "learning_rate": 0.00019586406838193463,
      "loss": 0.4136,
      "step": 1213
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3431246280670166,
      "learning_rate": 0.00019585723738869138,
      "loss": 1.4558,
      "step": 1214
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9483203887939453,
      "learning_rate": 0.00019585040087831266,
      "loss": 1.4839,
      "step": 1215
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2051782608032227,
      "learning_rate": 0.00019584355885119196,
      "loss": 1.1478,
      "step": 1216
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3048979043960571,
      "learning_rate": 0.00019583671130772313,
      "loss": 0.8855,
      "step": 1217
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5217148065567017,
      "learning_rate": 0.0001958298582483002,
      "loss": 1.0479,
      "step": 1218
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3903223276138306,
      "learning_rate": 0.0001958229996733177,
      "loss": 2.2544,
      "step": 1219
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4408791065216064,
      "learning_rate": 0.00019581613558317028,
      "loss": 1.5714,
      "step": 1220
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7406507134437561,
      "learning_rate": 0.0001958092659782531,
      "loss": 0.7879,
      "step": 1221
    },
    {
      "epoch": 0.09,
      "grad_norm": 5.482503414154053,
      "learning_rate": 0.00019580239085896145,
      "loss": 1.7153,
      "step": 1222
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1183463335037231,
      "learning_rate": 0.00019579551022569114,
      "loss": 1.0938,
      "step": 1223
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9453603029251099,
      "learning_rate": 0.0001957886240788381,
      "loss": 1.7547,
      "step": 1224
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.2231721878051758,
      "learning_rate": 0.00019578173241879872,
      "loss": 1.7444,
      "step": 1225
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1475460529327393,
      "learning_rate": 0.00019577483524596962,
      "loss": 1.7622,
      "step": 1226
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9593950510025024,
      "learning_rate": 0.00019576793256074782,
      "loss": 2.0344,
      "step": 1227
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.8735187649726868,
      "learning_rate": 0.00019576102436353056,
      "loss": 1.7438,
      "step": 1228
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9181464314460754,
      "learning_rate": 0.00019575411065471545,
      "loss": 2.1135,
      "step": 1229
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2152228355407715,
      "learning_rate": 0.00019574719143470044,
      "loss": 2.0153,
      "step": 1230
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9991458654403687,
      "learning_rate": 0.00019574026670388377,
      "loss": 2.3328,
      "step": 1231
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1463395357131958,
      "learning_rate": 0.00019573333646266398,
      "loss": 1.6533,
      "step": 1232
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.7124354839324951,
      "learning_rate": 0.00019572640071143993,
      "loss": 1.2717,
      "step": 1233
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0641324520111084,
      "learning_rate": 0.00019571945945061087,
      "loss": 1.5866,
      "step": 1234
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9903287291526794,
      "learning_rate": 0.00019571251268057625,
      "loss": 1.4312,
      "step": 1235
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5280494689941406,
      "learning_rate": 0.00019570556040173593,
      "loss": 2.0478,
      "step": 1236
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9444797039031982,
      "learning_rate": 0.00019569860261449006,
      "loss": 1.7682,
      "step": 1237
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.066878318786621,
      "learning_rate": 0.00019569163931923908,
      "loss": 1.2333,
      "step": 1238
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0864533185958862,
      "learning_rate": 0.00019568467051638377,
      "loss": 1.6645,
      "step": 1239
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.0207042694091797,
      "learning_rate": 0.0001956776962063252,
      "loss": 1.4602,
      "step": 1240
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.1605559587478638,
      "learning_rate": 0.00019567071638946484,
      "loss": 1.6327,
      "step": 1241
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.23871910572052,
      "learning_rate": 0.0001956637310662044,
      "loss": 1.7244,
      "step": 1242
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7135943174362183,
      "learning_rate": 0.0001956567402369459,
      "loss": 2.1606,
      "step": 1243
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.9043253660202026,
      "learning_rate": 0.00019564974390209167,
      "loss": 1.5898,
      "step": 1244
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.3362457752227783,
      "learning_rate": 0.00019564274206204448,
      "loss": 2.0407,
      "step": 1245
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1575307846069336,
      "learning_rate": 0.00019563573471720731,
      "loss": 1.669,
      "step": 1246
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3641501665115356,
      "learning_rate": 0.0001956287218679834,
      "loss": 1.5124,
      "step": 1247
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5798510313034058,
      "learning_rate": 0.00019562170351477645,
      "loss": 1.7143,
      "step": 1248
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9801362156867981,
      "learning_rate": 0.00019561467965799033,
      "loss": 2.0344,
      "step": 1249
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3144769668579102,
      "learning_rate": 0.00019560765029802944,
      "loss": 1.3396,
      "step": 1250
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2117725610733032,
      "learning_rate": 0.0001956006154352982,
      "loss": 1.2211,
      "step": 1251
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5486810207366943,
      "learning_rate": 0.00019559357507020162,
      "loss": 1.7472,
      "step": 1252
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0527880191802979,
      "learning_rate": 0.00019558652920314485,
      "loss": 1.5637,
      "step": 1253
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2266149520874023,
      "learning_rate": 0.00019557947783453345,
      "loss": 1.474,
      "step": 1254
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5136995315551758,
      "learning_rate": 0.00019557242096477327,
      "loss": 1.7884,
      "step": 1255
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.59847891330719,
      "learning_rate": 0.0001955653585942705,
      "loss": 1.7147,
      "step": 1256
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8138285279273987,
      "learning_rate": 0.00019555829072343155,
      "loss": 1.8806,
      "step": 1257
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.150029182434082,
      "learning_rate": 0.00019555121735266324,
      "loss": 2.1249,
      "step": 1258
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.127193570137024,
      "learning_rate": 0.00019554413848237273,
      "loss": 2.0231,
      "step": 1259
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9578621983528137,
      "learning_rate": 0.00019553705411296736,
      "loss": 1.8647,
      "step": 1260
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1921977996826172,
      "learning_rate": 0.00019552996424485497,
      "loss": 1.781,
      "step": 1261
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.8497257232666016,
      "learning_rate": 0.0001955228688784436,
      "loss": 2.1364,
      "step": 1262
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7117955684661865,
      "learning_rate": 0.00019551576801414157,
      "loss": 1.5342,
      "step": 1263
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.048967719078064,
      "learning_rate": 0.00019550866165235763,
      "loss": 1.5834,
      "step": 1264
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4468271732330322,
      "learning_rate": 0.0001955015497935008,
      "loss": 2.506,
      "step": 1265
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.107609748840332,
      "learning_rate": 0.00019549443243798039,
      "loss": 1.957,
      "step": 1266
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.048208236694336,
      "learning_rate": 0.00019548730958620603,
      "loss": 1.9324,
      "step": 1267
    },
    {
      "epoch": 0.1,
      "grad_norm": 7.830131530761719,
      "learning_rate": 0.00019548018123858771,
      "loss": 1.3569,
      "step": 1268
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1167163848876953,
      "learning_rate": 0.00019547304739553567,
      "loss": 2.0284,
      "step": 1269
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3018049001693726,
      "learning_rate": 0.00019546590805746052,
      "loss": 1.6098,
      "step": 1270
    },
    {
      "epoch": 0.1,
      "grad_norm": 6.003747940063477,
      "learning_rate": 0.0001954587632247732,
      "loss": 2.3594,
      "step": 1271
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.764214277267456,
      "learning_rate": 0.0001954516128978849,
      "loss": 2.4898,
      "step": 1272
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9182487726211548,
      "learning_rate": 0.00019544445707720719,
      "loss": 1.6574,
      "step": 1273
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9551866054534912,
      "learning_rate": 0.0001954372957631519,
      "loss": 1.3597,
      "step": 1274
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9553442001342773,
      "learning_rate": 0.00019543012895613122,
      "loss": 2.1677,
      "step": 1275
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3059945106506348,
      "learning_rate": 0.00019542295665655766,
      "loss": 1.5546,
      "step": 1276
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9357663989067078,
      "learning_rate": 0.00019541577886484396,
      "loss": 1.6502,
      "step": 1277
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1167773008346558,
      "learning_rate": 0.0001954085955814033,
      "loss": 1.559,
      "step": 1278
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0889025926589966,
      "learning_rate": 0.00019540140680664913,
      "loss": 1.5203,
      "step": 1279
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.404433012008667,
      "learning_rate": 0.00019539421254099519,
      "loss": 2.1068,
      "step": 1280
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1421009302139282,
      "learning_rate": 0.00019538701278485552,
      "loss": 1.117,
      "step": 1281
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.038179874420166,
      "learning_rate": 0.00019537980753864454,
      "loss": 1.7502,
      "step": 1282
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8593443036079407,
      "learning_rate": 0.00019537259680277696,
      "loss": 1.808,
      "step": 1283
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.459715723991394,
      "learning_rate": 0.00019536538057766777,
      "loss": 1.4737,
      "step": 1284
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3936309814453125,
      "learning_rate": 0.00019535815886373233,
      "loss": 1.4824,
      "step": 1285
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4168773889541626,
      "learning_rate": 0.00019535093166138628,
      "loss": 1.7562,
      "step": 1286
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9704478979110718,
      "learning_rate": 0.0001953436989710456,
      "loss": 1.3686,
      "step": 1287
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4351533651351929,
      "learning_rate": 0.00019533646079312656,
      "loss": 1.3489,
      "step": 1288
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4359346628189087,
      "learning_rate": 0.00019532921712804574,
      "loss": 2.0487,
      "step": 1289
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1898577213287354,
      "learning_rate": 0.00019532196797622008,
      "loss": 2.0751,
      "step": 1290
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8338361978530884,
      "learning_rate": 0.00019531471333806682,
      "loss": 1.9622,
      "step": 1291
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9917848706245422,
      "learning_rate": 0.00019530745321400347,
      "loss": 1.3789,
      "step": 1292
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8258562088012695,
      "learning_rate": 0.00019530018760444793,
      "loss": 1.9858,
      "step": 1293
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0829968452453613,
      "learning_rate": 0.00019529291650981836,
      "loss": 1.8292,
      "step": 1294
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.205344319343567,
      "learning_rate": 0.00019528563993053327,
      "loss": 1.458,
      "step": 1295
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8985131978988647,
      "learning_rate": 0.00019527835786701142,
      "loss": 1.3783,
      "step": 1296
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4858843088150024,
      "learning_rate": 0.00019527107031967197,
      "loss": 1.3568,
      "step": 1297
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.107722282409668,
      "learning_rate": 0.00019526377728893438,
      "loss": 1.6922,
      "step": 1298
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1761279106140137,
      "learning_rate": 0.00019525647877521838,
      "loss": 1.6455,
      "step": 1299
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.055090308189392,
      "learning_rate": 0.00019524917477894404,
      "loss": 1.119,
      "step": 1300
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.994053304195404,
      "learning_rate": 0.00019524186530053177,
      "loss": 1.413,
      "step": 1301
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8411820530891418,
      "learning_rate": 0.00019523455034040223,
      "loss": 1.7911,
      "step": 1302
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.162759780883789,
      "learning_rate": 0.00019522722989897648,
      "loss": 1.7057,
      "step": 1303
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1745386123657227,
      "learning_rate": 0.00019521990397667582,
      "loss": 2.1144,
      "step": 1304
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.059563159942627,
      "learning_rate": 0.00019521257257392192,
      "loss": 2.3236,
      "step": 1305
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8293344974517822,
      "learning_rate": 0.00019520523569113677,
      "loss": 1.4142,
      "step": 1306
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8569830060005188,
      "learning_rate": 0.00019519789332874258,
      "loss": 1.5531,
      "step": 1307
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7262959480285645,
      "learning_rate": 0.000195190545487162,
      "loss": 1.7012,
      "step": 1308
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9379153847694397,
      "learning_rate": 0.00019518319216681794,
      "loss": 1.9615,
      "step": 1309
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3392084836959839,
      "learning_rate": 0.0001951758333681336,
      "loss": 1.8141,
      "step": 1310
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9239846467971802,
      "learning_rate": 0.00019516846909153253,
      "loss": 1.8387,
      "step": 1311
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1860907077789307,
      "learning_rate": 0.0001951610993374386,
      "loss": 1.8333,
      "step": 1312
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9654691815376282,
      "learning_rate": 0.00019515372410627596,
      "loss": 2.0417,
      "step": 1313
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.198992967605591,
      "learning_rate": 0.00019514634339846913,
      "loss": 2.0746,
      "step": 1314
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1831985712051392,
      "learning_rate": 0.00019513895721444286,
      "loss": 1.7923,
      "step": 1315
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9720778465270996,
      "learning_rate": 0.0001951315655546223,
      "loss": 1.7479,
      "step": 1316
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6911141872406006,
      "learning_rate": 0.0001951241684194329,
      "loss": 2.0214,
      "step": 1317
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0111324787139893,
      "learning_rate": 0.0001951167658093004,
      "loss": 1.7849,
      "step": 1318
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8662893772125244,
      "learning_rate": 0.00019510935772465082,
      "loss": 1.16,
      "step": 1319
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9339865446090698,
      "learning_rate": 0.0001951019441659106,
      "loss": 1.2688,
      "step": 1320
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6083894968032837,
      "learning_rate": 0.00019509452513350635,
      "loss": 1.4259,
      "step": 1321
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0270313024520874,
      "learning_rate": 0.00019508710062786518,
      "loss": 1.5981,
      "step": 1322
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4059401750564575,
      "learning_rate": 0.00019507967064941433,
      "loss": 1.604,
      "step": 1323
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1923325061798096,
      "learning_rate": 0.00019507223519858147,
      "loss": 1.1418,
      "step": 1324
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9971358776092529,
      "learning_rate": 0.00019506479427579455,
      "loss": 1.5849,
      "step": 1325
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.242719292640686,
      "learning_rate": 0.0001950573478814819,
      "loss": 1.4292,
      "step": 1326
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.7983382940292358,
      "learning_rate": 0.000195049896016072,
      "loss": 1.8347,
      "step": 1327
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5022010803222656,
      "learning_rate": 0.00019504243867999379,
      "loss": 1.89,
      "step": 1328
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3982974290847778,
      "learning_rate": 0.00019503497587367648,
      "loss": 1.7217,
      "step": 1329
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2898939847946167,
      "learning_rate": 0.00019502750759754962,
      "loss": 1.8616,
      "step": 1330
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0442907810211182,
      "learning_rate": 0.000195020033852043,
      "loss": 1.7589,
      "step": 1331
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.757554292678833,
      "learning_rate": 0.00019501255463758686,
      "loss": 1.7031,
      "step": 1332
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0517783164978027,
      "learning_rate": 0.0001950050699546116,
      "loss": 1.7166,
      "step": 1333
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8291583061218262,
      "learning_rate": 0.00019499757980354803,
      "loss": 1.6571,
      "step": 1334
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.7491521835327148,
      "learning_rate": 0.00019499008418482724,
      "loss": 1.3837,
      "step": 1335
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.7953094244003296,
      "learning_rate": 0.0001949825830988807,
      "loss": 1.6898,
      "step": 1336
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9205675721168518,
      "learning_rate": 0.00019497507654614004,
      "loss": 1.6408,
      "step": 1337
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3283450603485107,
      "learning_rate": 0.00019496756452703739,
      "loss": 1.9002,
      "step": 1338
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.181356430053711,
      "learning_rate": 0.00019496004704200507,
      "loss": 1.7492,
      "step": 1339
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0915969610214233,
      "learning_rate": 0.00019495252409147578,
      "loss": 1.2494,
      "step": 1340
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1654757261276245,
      "learning_rate": 0.00019494499567588252,
      "loss": 2.1525,
      "step": 1341
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4351834058761597,
      "learning_rate": 0.00019493746179565852,
      "loss": 2.1778,
      "step": 1342
    },
    {
      "epoch": 0.1,
      "grad_norm": 4.083967208862305,
      "learning_rate": 0.00019492992245123748,
      "loss": 2.1235,
      "step": 1343
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.658766508102417,
      "learning_rate": 0.0001949223776430533,
      "loss": 1.9108,
      "step": 1344
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8735857009887695,
      "learning_rate": 0.0001949148273715402,
      "loss": 1.4861,
      "step": 1345
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4835023880004883,
      "learning_rate": 0.0001949072716371328,
      "loss": 1.6883,
      "step": 1346
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9829670190811157,
      "learning_rate": 0.00019489971044026595,
      "loss": 1.9246,
      "step": 1347
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8032562732696533,
      "learning_rate": 0.00019489214378137483,
      "loss": 1.6289,
      "step": 1348
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9375246167182922,
      "learning_rate": 0.00019488457166089496,
      "loss": 1.4512,
      "step": 1349
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2677795886993408,
      "learning_rate": 0.00019487699407926213,
      "loss": 1.7825,
      "step": 1350
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8370715379714966,
      "learning_rate": 0.00019486941103691248,
      "loss": 1.9602,
      "step": 1351
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6197471618652344,
      "learning_rate": 0.0001948618225342825,
      "loss": 2.1399,
      "step": 1352
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1579813957214355,
      "learning_rate": 0.00019485422857180892,
      "loss": 1.7165,
      "step": 1353
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5100070238113403,
      "learning_rate": 0.0001948466291499288,
      "loss": 1.4544,
      "step": 1354
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3142808675765991,
      "learning_rate": 0.00019483902426907954,
      "loss": 1.6094,
      "step": 1355
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9376998543739319,
      "learning_rate": 0.0001948314139296989,
      "loss": 1.5501,
      "step": 1356
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8474510312080383,
      "learning_rate": 0.0001948237981322248,
      "loss": 1.1903,
      "step": 1357
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3804550170898438,
      "learning_rate": 0.00019481617687709565,
      "loss": 1.3133,
      "step": 1358
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3505043983459473,
      "learning_rate": 0.00019480855016475008,
      "loss": 1.2251,
      "step": 1359
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5591557025909424,
      "learning_rate": 0.00019480091799562704,
      "loss": 2.4732,
      "step": 1360
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9032280445098877,
      "learning_rate": 0.00019479328037016582,
      "loss": 1.7587,
      "step": 1361
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0715867280960083,
      "learning_rate": 0.00019478563728880598,
      "loss": 1.9602,
      "step": 1362
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.1609411239624023,
      "learning_rate": 0.00019477798875198747,
      "loss": 1.7177,
      "step": 1363
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0410027503967285,
      "learning_rate": 0.00019477033476015046,
      "loss": 1.765,
      "step": 1364
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.9381449818611145,
      "learning_rate": 0.00019476267531373548,
      "loss": 1.149,
      "step": 1365
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2048155069351196,
      "learning_rate": 0.00019475501041318343,
      "loss": 1.9087,
      "step": 1366
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2797808647155762,
      "learning_rate": 0.00019474734005893544,
      "loss": 1.8634,
      "step": 1367
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.9609274864196777,
      "learning_rate": 0.00019473966425143292,
      "loss": 1.6502,
      "step": 1368
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5187346935272217,
      "learning_rate": 0.0001947319829911178,
      "loss": 1.8742,
      "step": 1369
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.315028667449951,
      "learning_rate": 0.00019472429627843203,
      "loss": 1.997,
      "step": 1370
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0750750303268433,
      "learning_rate": 0.0001947166041138181,
      "loss": 1.1455,
      "step": 1371
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.8698201179504395,
      "learning_rate": 0.00019470890649771875,
      "loss": 1.2981,
      "step": 1372
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3796355724334717,
      "learning_rate": 0.000194701203430577,
      "loss": 1.5893,
      "step": 1373
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5501590967178345,
      "learning_rate": 0.00019469349491283618,
      "loss": 1.9268,
      "step": 1374
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.918686032295227,
      "learning_rate": 0.00019468578094493998,
      "loss": 1.509,
      "step": 1375
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3189022541046143,
      "learning_rate": 0.00019467806152733243,
      "loss": 1.8025,
      "step": 1376
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.469802141189575,
      "learning_rate": 0.00019467033666045776,
      "loss": 1.9495,
      "step": 1377
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.0081396102905273,
      "learning_rate": 0.0001946626063447606,
      "loss": 2.2181,
      "step": 1378
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7953611612319946,
      "learning_rate": 0.0001946548705806859,
      "loss": 1.6196,
      "step": 1379
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.42487633228302,
      "learning_rate": 0.00019464712936867885,
      "loss": 1.4224,
      "step": 1380
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9798352122306824,
      "learning_rate": 0.00019463938270918507,
      "loss": 1.9211,
      "step": 1381
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.835377037525177,
      "learning_rate": 0.00019463163060265038,
      "loss": 1.5969,
      "step": 1382
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0623130798339844,
      "learning_rate": 0.00019462387304952094,
      "loss": 1.7564,
      "step": 1383
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9405195116996765,
      "learning_rate": 0.00019461611005024327,
      "loss": 2.1411,
      "step": 1384
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5837690830230713,
      "learning_rate": 0.00019460834160526415,
      "loss": 1.4055,
      "step": 1385
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.280291199684143,
      "learning_rate": 0.00019460056771503073,
      "loss": 1.2788,
      "step": 1386
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8776177167892456,
      "learning_rate": 0.00019459278837999046,
      "loss": 1.4709,
      "step": 1387
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4057576656341553,
      "learning_rate": 0.00019458500360059104,
      "loss": 1.6923,
      "step": 1388
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8499172925949097,
      "learning_rate": 0.00019457721337728053,
      "loss": 1.5279,
      "step": 1389
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5343542098999023,
      "learning_rate": 0.00019456941771050734,
      "loss": 1.7809,
      "step": 1390
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.092466115951538,
      "learning_rate": 0.00019456161660072013,
      "loss": 1.9352,
      "step": 1391
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1035325527191162,
      "learning_rate": 0.00019455381004836788,
      "loss": 1.6252,
      "step": 1392
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7365010976791382,
      "learning_rate": 0.00019454599805389996,
      "loss": 1.7626,
      "step": 1393
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.306161642074585,
      "learning_rate": 0.00019453818061776593,
      "loss": 1.7232,
      "step": 1394
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.659720778465271,
      "learning_rate": 0.00019453035774041574,
      "loss": 1.6235,
      "step": 1395
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0101542472839355,
      "learning_rate": 0.0001945225294222997,
      "loss": 2.1552,
      "step": 1396
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2410309314727783,
      "learning_rate": 0.0001945146956638683,
      "loss": 1.4917,
      "step": 1397
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5793052911758423,
      "learning_rate": 0.00019450685646557245,
      "loss": 1.8062,
      "step": 1398
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7429863214492798,
      "learning_rate": 0.0001944990118278634,
      "loss": 1.5096,
      "step": 1399
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.474210739135742,
      "learning_rate": 0.00019449116175119252,
      "loss": 2.3329,
      "step": 1400
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5440247058868408,
      "learning_rate": 0.00019448330623601174,
      "loss": 1.1598,
      "step": 1401
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4211366176605225,
      "learning_rate": 0.00019447544528277316,
      "loss": 1.4875,
      "step": 1402
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8582671284675598,
      "learning_rate": 0.0001944675788919292,
      "loss": 1.2186,
      "step": 1403
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0649763345718384,
      "learning_rate": 0.00019445970706393265,
      "loss": 2.2134,
      "step": 1404
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0632630586624146,
      "learning_rate": 0.00019445182979923654,
      "loss": 1.2474,
      "step": 1405
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0944315195083618,
      "learning_rate": 0.00019444394709829428,
      "loss": 1.9668,
      "step": 1406
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1527769565582275,
      "learning_rate": 0.00019443605896155957,
      "loss": 1.7855,
      "step": 1407
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8630343675613403,
      "learning_rate": 0.00019442816538948638,
      "loss": 0.937,
      "step": 1408
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8085539937019348,
      "learning_rate": 0.00019442026638252907,
      "loss": 1.1175,
      "step": 1409
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.85823392868042,
      "learning_rate": 0.00019441236194114227,
      "loss": 1.9581,
      "step": 1410
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8454265594482422,
      "learning_rate": 0.00019440445206578093,
      "loss": 1.8103,
      "step": 1411
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1626086235046387,
      "learning_rate": 0.00019439653675690027,
      "loss": 1.9422,
      "step": 1412
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4468669891357422,
      "learning_rate": 0.00019438861601495585,
      "loss": 2.0774,
      "step": 1413
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7841312885284424,
      "learning_rate": 0.00019438068984040365,
      "loss": 2.2526,
      "step": 1414
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9438977241516113,
      "learning_rate": 0.0001943727582336998,
      "loss": 1.7213,
      "step": 1415
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1270768642425537,
      "learning_rate": 0.0001943648211953008,
      "loss": 1.6845,
      "step": 1416
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0069315433502197,
      "learning_rate": 0.0001943568787256635,
      "loss": 1.4683,
      "step": 1417
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6032801866531372,
      "learning_rate": 0.00019434893082524498,
      "loss": 1.4781,
      "step": 1418
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9407336115837097,
      "learning_rate": 0.00019434097749450278,
      "loss": 2.2349,
      "step": 1419
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0026280879974365,
      "learning_rate": 0.00019433301873389463,
      "loss": 1.7292,
      "step": 1420
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3161996603012085,
      "learning_rate": 0.00019432505454387852,
      "loss": 1.7959,
      "step": 1421
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2550151348114014,
      "learning_rate": 0.00019431708492491294,
      "loss": 1.4928,
      "step": 1422
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1663708686828613,
      "learning_rate": 0.00019430910987745654,
      "loss": 1.2836,
      "step": 1423
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.66930890083313,
      "learning_rate": 0.00019430112940196833,
      "loss": 1.8582,
      "step": 1424
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9286302328109741,
      "learning_rate": 0.00019429314349890764,
      "loss": 1.2175,
      "step": 1425
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4760113954544067,
      "learning_rate": 0.00019428515216873412,
      "loss": 1.3624,
      "step": 1426
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8642199635505676,
      "learning_rate": 0.00019427715541190765,
      "loss": 1.8705,
      "step": 1427
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9404789805412292,
      "learning_rate": 0.0001942691532288886,
      "loss": 1.4528,
      "step": 1428
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2646359205245972,
      "learning_rate": 0.00019426114562013744,
      "loss": 1.9438,
      "step": 1429
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5328923463821411,
      "learning_rate": 0.0001942531325861151,
      "loss": 1.3963,
      "step": 1430
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.067549705505371,
      "learning_rate": 0.0001942451141272828,
      "loss": 1.4227,
      "step": 1431
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.626806378364563,
      "learning_rate": 0.00019423709024410198,
      "loss": 1.9617,
      "step": 1432
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.2757811546325684,
      "learning_rate": 0.00019422906093703453,
      "loss": 1.6698,
      "step": 1433
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9389740228652954,
      "learning_rate": 0.00019422102620654253,
      "loss": 1.459,
      "step": 1434
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9910578727722168,
      "learning_rate": 0.00019421298605308847,
      "loss": 1.9,
      "step": 1435
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8709587454795837,
      "learning_rate": 0.00019420494047713507,
      "loss": 2.0638,
      "step": 1436
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8640468716621399,
      "learning_rate": 0.00019419688947914542,
      "loss": 1.4823,
      "step": 1437
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0484447479248047,
      "learning_rate": 0.00019418883305958288,
      "loss": 1.9601,
      "step": 1438
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6391592025756836,
      "learning_rate": 0.00019418077121891118,
      "loss": 2.3939,
      "step": 1439
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7470917701721191,
      "learning_rate": 0.00019417270395759428,
      "loss": 1.3637,
      "step": 1440
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3418835401535034,
      "learning_rate": 0.00019416463127609656,
      "loss": 1.6271,
      "step": 1441
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7336476445198059,
      "learning_rate": 0.00019415655317488257,
      "loss": 1.5487,
      "step": 1442
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7894149422645569,
      "learning_rate": 0.0001941484696544173,
      "loss": 1.2155,
      "step": 1443
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.624173879623413,
      "learning_rate": 0.00019414038071516603,
      "loss": 1.8755,
      "step": 1444
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9435840845108032,
      "learning_rate": 0.00019413228635759426,
      "loss": 1.3624,
      "step": 1445
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3198391199111938,
      "learning_rate": 0.0001941241865821679,
      "loss": 1.805,
      "step": 1446
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9424530863761902,
      "learning_rate": 0.00019411608138935314,
      "loss": 1.7044,
      "step": 1447
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1033570766448975,
      "learning_rate": 0.00019410797077961647,
      "loss": 1.4738,
      "step": 1448
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0688055753707886,
      "learning_rate": 0.00019409985475342468,
      "loss": 1.6652,
      "step": 1449
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3101948499679565,
      "learning_rate": 0.000194091733311245,
      "loss": 1.9527,
      "step": 1450
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8363736867904663,
      "learning_rate": 0.00019408360645354472,
      "loss": 1.6501,
      "step": 1451
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2176258563995361,
      "learning_rate": 0.00019407547418079165,
      "loss": 1.7894,
      "step": 1452
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0946321487426758,
      "learning_rate": 0.0001940673364934539,
      "loss": 1.9353,
      "step": 1453
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1485427618026733,
      "learning_rate": 0.0001940591933919998,
      "loss": 2.2176,
      "step": 1454
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.7586565017700195,
      "learning_rate": 0.00019405104487689798,
      "loss": 1.7416,
      "step": 1455
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.940780520439148,
      "learning_rate": 0.0001940428909486175,
      "loss": 0.9322,
      "step": 1456
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8997881412506104,
      "learning_rate": 0.00019403473160762764,
      "loss": 1.584,
      "step": 1457
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1087852716445923,
      "learning_rate": 0.00019402656685439803,
      "loss": 1.9492,
      "step": 1458
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8832969069480896,
      "learning_rate": 0.0001940183966893986,
      "loss": 1.0766,
      "step": 1459
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.35310959815979,
      "learning_rate": 0.00019401022111309959,
      "loss": 0.832,
      "step": 1460
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2407026290893555,
      "learning_rate": 0.0001940020401259715,
      "loss": 2.229,
      "step": 1461
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6516765356063843,
      "learning_rate": 0.00019399385372848528,
      "loss": 2.8851,
      "step": 1462
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.956274688243866,
      "learning_rate": 0.00019398566192111206,
      "loss": 1.7405,
      "step": 1463
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7775480151176453,
      "learning_rate": 0.00019397746470432332,
      "loss": 1.5797,
      "step": 1464
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8677294254302979,
      "learning_rate": 0.00019396926207859084,
      "loss": 1.9522,
      "step": 1465
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3529980182647705,
      "learning_rate": 0.00019396105404438679,
      "loss": 1.7792,
      "step": 1466
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3449444770812988,
      "learning_rate": 0.0001939528406021835,
      "loss": 2.2102,
      "step": 1467
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.390345811843872,
      "learning_rate": 0.00019394462175245381,
      "loss": 0.8903,
      "step": 1468
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4766026735305786,
      "learning_rate": 0.00019393639749567068,
      "loss": 1.3381,
      "step": 1469
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3699370622634888,
      "learning_rate": 0.00019392816783230746,
      "loss": 2.1158,
      "step": 1470
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6378533840179443,
      "learning_rate": 0.00019391993276283786,
      "loss": 2.0108,
      "step": 1471
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0423630475997925,
      "learning_rate": 0.00019391169228773585,
      "loss": 2.1801,
      "step": 1472
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7793729305267334,
      "learning_rate": 0.0001939034464074757,
      "loss": 1.8727,
      "step": 1473
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4836974143981934,
      "learning_rate": 0.00019389519512253204,
      "loss": 1.0346,
      "step": 1474
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.352034091949463,
      "learning_rate": 0.00019388693843337972,
      "loss": 2.0528,
      "step": 1475
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3093448877334595,
      "learning_rate": 0.00019387867634049398,
      "loss": 1.2455,
      "step": 1476
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.218622088432312,
      "learning_rate": 0.00019387040884435037,
      "loss": 1.9707,
      "step": 1477
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.28944993019104,
      "learning_rate": 0.00019386213594542473,
      "loss": 1.3627,
      "step": 1478
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1681249141693115,
      "learning_rate": 0.0001938538576441932,
      "loss": 2.0074,
      "step": 1479
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9339519143104553,
      "learning_rate": 0.00019384557394113228,
      "loss": 1.8085,
      "step": 1480
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1258500814437866,
      "learning_rate": 0.00019383728483671867,
      "loss": 1.8771,
      "step": 1481
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1575146913528442,
      "learning_rate": 0.00019382899033142952,
      "loss": 1.4456,
      "step": 1482
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.008057951927185,
      "learning_rate": 0.00019382069042574222,
      "loss": 1.7147,
      "step": 1483
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8224812746047974,
      "learning_rate": 0.00019381238512013445,
      "loss": 1.7428,
      "step": 1484
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.812252938747406,
      "learning_rate": 0.00019380407441508426,
      "loss": 1.4131,
      "step": 1485
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9904577732086182,
      "learning_rate": 0.00019379575831106994,
      "loss": 1.412,
      "step": 1486
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.693717360496521,
      "learning_rate": 0.0001937874368085702,
      "loss": 2.2035,
      "step": 1487
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.491120457649231,
      "learning_rate": 0.00019377910990806387,
      "loss": 1.1726,
      "step": 1488
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.241468667984009,
      "learning_rate": 0.00019377077761003033,
      "loss": 1.8691,
      "step": 1489
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.09627366065979,
      "learning_rate": 0.00019376243991494908,
      "loss": 2.1657,
      "step": 1490
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.8766071796417236,
      "learning_rate": 0.00019375409682330003,
      "loss": 1.1038,
      "step": 1491
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.0472251176834106,
      "learning_rate": 0.0001937457483355634,
      "loss": 1.318,
      "step": 1492
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3397204875946045,
      "learning_rate": 0.00019373739445221965,
      "loss": 1.6523,
      "step": 1493
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.28510582447052,
      "learning_rate": 0.0001937290351737496,
      "loss": 1.6666,
      "step": 1494
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.251731038093567,
      "learning_rate": 0.00019372067050063438,
      "loss": 1.4432,
      "step": 1495
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2333152294158936,
      "learning_rate": 0.00019371230043335543,
      "loss": 1.3834,
      "step": 1496
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.014230251312256,
      "learning_rate": 0.00019370392497239448,
      "loss": 1.9045,
      "step": 1497
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5149961709976196,
      "learning_rate": 0.00019369554411823362,
      "loss": 0.6285,
      "step": 1498
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9866610765457153,
      "learning_rate": 0.00019368715787135518,
      "loss": 1.5793,
      "step": 1499
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.9323747158050537,
      "learning_rate": 0.00019367876623224185,
      "loss": 1.5359,
      "step": 1500
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1184386014938354,
      "learning_rate": 0.00019367036920137665,
      "loss": 1.9068,
      "step": 1501
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.843031644821167,
      "learning_rate": 0.0001936619667792428,
      "loss": 1.4176,
      "step": 1502
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2129732370376587,
      "learning_rate": 0.00019365355896632398,
      "loss": 1.9684,
      "step": 1503
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.7817725539207458,
      "learning_rate": 0.00019364514576310408,
      "loss": 1.3887,
      "step": 1504
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.7935729026794434,
      "learning_rate": 0.00019363672717006734,
      "loss": 1.3299,
      "step": 1505
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.9651843309402466,
      "learning_rate": 0.00019362830318769828,
      "loss": 1.9299,
      "step": 1506
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1934435367584229,
      "learning_rate": 0.00019361987381648176,
      "loss": 1.4039,
      "step": 1507
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5846279859542847,
      "learning_rate": 0.00019361143905690292,
      "loss": 1.5976,
      "step": 1508
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.607005000114441,
      "learning_rate": 0.00019360299890944726,
      "loss": 2.0151,
      "step": 1509
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.165794849395752,
      "learning_rate": 0.00019359455337460054,
      "loss": 1.7268,
      "step": 1510
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9599846005439758,
      "learning_rate": 0.00019358610245284882,
      "loss": 2.2693,
      "step": 1511
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.359123468399048,
      "learning_rate": 0.0001935776461446786,
      "loss": 1.7195,
      "step": 1512
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4806674718856812,
      "learning_rate": 0.0001935691844505765,
      "loss": 1.571,
      "step": 1513
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9351876974105835,
      "learning_rate": 0.00019356071737102953,
      "loss": 1.502,
      "step": 1514
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.120791435241699,
      "learning_rate": 0.00019355224490652507,
      "loss": 1.3745,
      "step": 1515
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1413536071777344,
      "learning_rate": 0.00019354376705755074,
      "loss": 1.526,
      "step": 1516
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2305312156677246,
      "learning_rate": 0.00019353528382459447,
      "loss": 1.0598,
      "step": 1517
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3722152709960938,
      "learning_rate": 0.00019352679520814455,
      "loss": 1.435,
      "step": 1518
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.498274326324463,
      "learning_rate": 0.00019351830120868955,
      "loss": 1.6196,
      "step": 1519
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9533841609954834,
      "learning_rate": 0.0001935098018267183,
      "loss": 1.4528,
      "step": 1520
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9807798266410828,
      "learning_rate": 0.00019350129706272005,
      "loss": 0.9166,
      "step": 1521
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.7444753050804138,
      "learning_rate": 0.00019349278691718427,
      "loss": 1.1007,
      "step": 1522
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9343287944793701,
      "learning_rate": 0.00019348427139060075,
      "loss": 1.6041,
      "step": 1523
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0358009338378906,
      "learning_rate": 0.00019347575048345964,
      "loss": 1.2339,
      "step": 1524
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.015976905822754,
      "learning_rate": 0.00019346722419625136,
      "loss": 1.6699,
      "step": 1525
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9950501918792725,
      "learning_rate": 0.0001934586925294666,
      "loss": 2.6922,
      "step": 1526
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5833463668823242,
      "learning_rate": 0.0001934501554835965,
      "loss": 1.1929,
      "step": 1527
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0742344856262207,
      "learning_rate": 0.00019344161305913233,
      "loss": 1.1922,
      "step": 1528
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.384032964706421,
      "learning_rate": 0.00019343306525656582,
      "loss": 1.5877,
      "step": 1529
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8424710631370544,
      "learning_rate": 0.0001934245120763889,
      "loss": 1.1356,
      "step": 1530
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8016704320907593,
      "learning_rate": 0.00019341595351909385,
      "loss": 1.2167,
      "step": 1531
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.434333324432373,
      "learning_rate": 0.00019340738958517327,
      "loss": 1.8155,
      "step": 1532
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3954439163208008,
      "learning_rate": 0.0001933988202751201,
      "loss": 1.6698,
      "step": 1533
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8653147220611572,
      "learning_rate": 0.00019339024558942753,
      "loss": 0.8191,
      "step": 1534
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8512495160102844,
      "learning_rate": 0.00019338166552858906,
      "loss": 1.8591,
      "step": 1535
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8389886021614075,
      "learning_rate": 0.00019337308009309857,
      "loss": 1.9337,
      "step": 1536
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2540959119796753,
      "learning_rate": 0.00019336448928345017,
      "loss": 1.9792,
      "step": 1537
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.205256700515747,
      "learning_rate": 0.00019335589310013834,
      "loss": 2.0324,
      "step": 1538
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.39993953704834,
      "learning_rate": 0.00019334729154365778,
      "loss": 1.688,
      "step": 1539
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1167454719543457,
      "learning_rate": 0.0001933386846145036,
      "loss": 1.6068,
      "step": 1540
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1787317991256714,
      "learning_rate": 0.00019333007231317115,
      "loss": 2.0852,
      "step": 1541
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.271950602531433,
      "learning_rate": 0.00019332145464015618,
      "loss": 2.144,
      "step": 1542
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1104363203048706,
      "learning_rate": 0.00019331283159595462,
      "loss": 1.3724,
      "step": 1543
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2752066850662231,
      "learning_rate": 0.0001933042031810628,
      "loss": 1.9264,
      "step": 1544
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0068695545196533,
      "learning_rate": 0.00019329556939597735,
      "loss": 2.0197,
      "step": 1545
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0867624282836914,
      "learning_rate": 0.00019328693024119517,
      "loss": 1.7639,
      "step": 1546
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7773481607437134,
      "learning_rate": 0.00019327828571721355,
      "loss": 2.3399,
      "step": 1547
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.194528579711914,
      "learning_rate": 0.00019326963582452992,
      "loss": 1.6003,
      "step": 1548
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.8787789344787598,
      "learning_rate": 0.00019326098056364222,
      "loss": 1.8745,
      "step": 1549
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0670562982559204,
      "learning_rate": 0.00019325231993504863,
      "loss": 1.9008,
      "step": 1550
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4004738330841064,
      "learning_rate": 0.00019324365393924754,
      "loss": 1.2207,
      "step": 1551
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.233516216278076,
      "learning_rate": 0.00019323498257673775,
      "loss": 2.2359,
      "step": 1552
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8923971652984619,
      "learning_rate": 0.00019322630584801836,
      "loss": 1.7147,
      "step": 1553
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.051430344581604,
      "learning_rate": 0.0001932176237535888,
      "loss": 2.0495,
      "step": 1554
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9379070401191711,
      "learning_rate": 0.00019320893629394873,
      "loss": 1.7622,
      "step": 1555
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4284720420837402,
      "learning_rate": 0.00019320024346959817,
      "loss": 1.6372,
      "step": 1556
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0462442636489868,
      "learning_rate": 0.00019319154528103742,
      "loss": 1.4169,
      "step": 1557
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.071358561515808,
      "learning_rate": 0.0001931828417287672,
      "loss": 1.5909,
      "step": 1558
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9028267860412598,
      "learning_rate": 0.00019317413281328834,
      "loss": 2.4232,
      "step": 1559
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8344634175300598,
      "learning_rate": 0.00019316541853510214,
      "loss": 2.0648,
      "step": 1560
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4759057760238647,
      "learning_rate": 0.00019315669889471016,
      "loss": 1.4637,
      "step": 1561
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3690211772918701,
      "learning_rate": 0.00019314797389261424,
      "loss": 1.9913,
      "step": 1562
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.094166874885559,
      "learning_rate": 0.00019313924352931658,
      "loss": 1.6987,
      "step": 1563
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0847915410995483,
      "learning_rate": 0.00019313050780531967,
      "loss": 1.7874,
      "step": 1564
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8497753739356995,
      "learning_rate": 0.00019312176672112627,
      "loss": 1.4339,
      "step": 1565
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0798360109329224,
      "learning_rate": 0.0001931130202772395,
      "loss": 1.2326,
      "step": 1566
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.415950059890747,
      "learning_rate": 0.00019310426847416275,
      "loss": 2.0314,
      "step": 1567
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.125968337059021,
      "learning_rate": 0.0001930955113123998,
      "loss": 1.8079,
      "step": 1568
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0883255004882812,
      "learning_rate": 0.00019308674879245456,
      "loss": 1.8856,
      "step": 1569
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.006594657897949,
      "learning_rate": 0.00019307798091483145,
      "loss": 2.1235,
      "step": 1570
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1066962480545044,
      "learning_rate": 0.00019306920768003512,
      "loss": 1.4777,
      "step": 1571
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.295605182647705,
      "learning_rate": 0.00019306042908857045,
      "loss": 1.7655,
      "step": 1572
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0115796327590942,
      "learning_rate": 0.00019305164514094275,
      "loss": 1.5438,
      "step": 1573
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.2199039459228516,
      "learning_rate": 0.00019304285583765756,
      "loss": 2.2468,
      "step": 1574
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4633476734161377,
      "learning_rate": 0.00019303406117922084,
      "loss": 1.8806,
      "step": 1575
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6315168142318726,
      "learning_rate": 0.00019302526116613864,
      "loss": 0.9431,
      "step": 1576
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5712140798568726,
      "learning_rate": 0.00019301645579891753,
      "loss": 2.3879,
      "step": 1577
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9906436204910278,
      "learning_rate": 0.0001930076450780643,
      "loss": 2.0956,
      "step": 1578
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1711962223052979,
      "learning_rate": 0.00019299882900408602,
      "loss": 1.1273,
      "step": 1579
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4912948608398438,
      "learning_rate": 0.00019299000757749016,
      "loss": 1.5509,
      "step": 1580
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.636802315711975,
      "learning_rate": 0.00019298118079878442,
      "loss": 2.0571,
      "step": 1581
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1507538557052612,
      "learning_rate": 0.00019297234866847684,
      "loss": 2.0549,
      "step": 1582
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1402382850646973,
      "learning_rate": 0.00019296351118707573,
      "loss": 1.6574,
      "step": 1583
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0281628370285034,
      "learning_rate": 0.0001929546683550898,
      "loss": 1.7253,
      "step": 1584
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0624717473983765,
      "learning_rate": 0.00019294582017302797,
      "loss": 1.9978,
      "step": 1585
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8075515031814575,
      "learning_rate": 0.00019293696664139946,
      "loss": 1.7314,
      "step": 1586
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7600430250167847,
      "learning_rate": 0.0001929281077607139,
      "loss": 1.9126,
      "step": 1587
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1603269577026367,
      "learning_rate": 0.00019291924353148114,
      "loss": 2.1241,
      "step": 1588
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.92140793800354,
      "learning_rate": 0.00019291037395421142,
      "loss": 1.5123,
      "step": 1589
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9078354239463806,
      "learning_rate": 0.00019290149902941516,
      "loss": 1.2261,
      "step": 1590
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3665488958358765,
      "learning_rate": 0.00019289261875760322,
      "loss": 1.6858,
      "step": 1591
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.331386089324951,
      "learning_rate": 0.00019288373313928664,
      "loss": 1.7699,
      "step": 1592
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0197392702102661,
      "learning_rate": 0.00019287484217497694,
      "loss": 1.5262,
      "step": 1593
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9346784353256226,
      "learning_rate": 0.00019286594586518575,
      "loss": 1.7003,
      "step": 1594
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0842286348342896,
      "learning_rate": 0.0001928570442104252,
      "loss": 1.852,
      "step": 1595
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9001630544662476,
      "learning_rate": 0.00019284813721120753,
      "loss": 1.5974,
      "step": 1596
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8670546412467957,
      "learning_rate": 0.00019283922486804548,
      "loss": 1.5046,
      "step": 1597
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0009156465530396,
      "learning_rate": 0.00019283030718145194,
      "loss": 1.5492,
      "step": 1598
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.817733883857727,
      "learning_rate": 0.0001928213841519402,
      "loss": 1.7567,
      "step": 1599
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2776738405227661,
      "learning_rate": 0.0001928124557800238,
      "loss": 1.9972,
      "step": 1600
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4913837909698486,
      "learning_rate": 0.0001928035220662167,
      "loss": 1.7367,
      "step": 1601
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2515606880187988,
      "learning_rate": 0.00019279458301103302,
      "loss": 2.0982,
      "step": 1602
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.361823558807373,
      "learning_rate": 0.00019278563861498723,
      "loss": 2.0632,
      "step": 1603
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2379587888717651,
      "learning_rate": 0.00019277668887859422,
      "loss": 1.6522,
      "step": 1604
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9833910465240479,
      "learning_rate": 0.00019276773380236904,
      "loss": 1.7046,
      "step": 1605
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.135169267654419,
      "learning_rate": 0.00019275877338682711,
      "loss": 1.7274,
      "step": 1606
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1936278343200684,
      "learning_rate": 0.00019274980763248414,
      "loss": 1.7096,
      "step": 1607
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4030346870422363,
      "learning_rate": 0.00019274083653985619,
      "loss": 2.047,
      "step": 1608
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2344117164611816,
      "learning_rate": 0.00019273186010945962,
      "loss": 2.6159,
      "step": 1609
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8459299206733704,
      "learning_rate": 0.00019272287834181103,
      "loss": 1.6875,
      "step": 1610
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0195869207382202,
      "learning_rate": 0.00019271389123742735,
      "loss": 1.6393,
      "step": 1611
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2385432720184326,
      "learning_rate": 0.00019270489879682592,
      "loss": 1.3879,
      "step": 1612
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9653452634811401,
      "learning_rate": 0.00019269590102052425,
      "loss": 1.5201,
      "step": 1613
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3570867776870728,
      "learning_rate": 0.0001926868979090402,
      "loss": 1.8881,
      "step": 1614
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.093590259552002,
      "learning_rate": 0.000192677889462892,
      "loss": 1.619,
      "step": 1615
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6901845932006836,
      "learning_rate": 0.0001926688756825981,
      "loss": 1.6519,
      "step": 1616
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1118518114089966,
      "learning_rate": 0.00019265985656867734,
      "loss": 1.5388,
      "step": 1617
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0313609838485718,
      "learning_rate": 0.0001926508321216488,
      "loss": 1.0906,
      "step": 1618
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1729061603546143,
      "learning_rate": 0.00019264180234203183,
      "loss": 1.3273,
      "step": 1619
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9112288951873779,
      "learning_rate": 0.0001926327672303462,
      "loss": 1.4913,
      "step": 1620
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.542903184890747,
      "learning_rate": 0.00019262372678711197,
      "loss": 1.3012,
      "step": 1621
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.777953028678894,
      "learning_rate": 0.00019261468101284938,
      "loss": 2.1103,
      "step": 1622
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.177708625793457,
      "learning_rate": 0.00019260562990807916,
      "loss": 2.024,
      "step": 1623
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5476278066635132,
      "learning_rate": 0.00019259657347332217,
      "loss": 2.1967,
      "step": 1624
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8168445825576782,
      "learning_rate": 0.0001925875117090997,
      "loss": 1.5898,
      "step": 1625
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.982977867126465,
      "learning_rate": 0.00019257844461593332,
      "loss": 1.2759,
      "step": 1626
    },
    {
      "epoch": 0.12,
      "grad_norm": 5.104249000549316,
      "learning_rate": 0.0001925693721943449,
      "loss": 1.5513,
      "step": 1627
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3763936758041382,
      "learning_rate": 0.00019256029444485656,
      "loss": 1.3103,
      "step": 1628
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9642529487609863,
      "learning_rate": 0.00019255121136799083,
      "loss": 1.264,
      "step": 1629
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.0201640129089355,
      "learning_rate": 0.00019254212296427044,
      "loss": 2.5998,
      "step": 1630
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.155240774154663,
      "learning_rate": 0.0001925330292342185,
      "loss": 1.3266,
      "step": 1631
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4154568910598755,
      "learning_rate": 0.00019252393017835844,
      "loss": 2.2166,
      "step": 1632
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.379042625427246,
      "learning_rate": 0.00019251482579721392,
      "loss": 1.8271,
      "step": 1633
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7236731052398682,
      "learning_rate": 0.00019250571609130896,
      "loss": 1.4492,
      "step": 1634
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.8852227330207825,
      "learning_rate": 0.00019249660106116791,
      "loss": 1.3301,
      "step": 1635
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.0978435277938843,
      "learning_rate": 0.00019248748070731532,
      "loss": 2.1933,
      "step": 1636
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.9995425939559937,
      "learning_rate": 0.00019247835503027622,
      "loss": 1.4035,
      "step": 1637
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8327672481536865,
      "learning_rate": 0.00019246922403057574,
      "loss": 1.7974,
      "step": 1638
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2793017625808716,
      "learning_rate": 0.0001924600877087395,
      "loss": 1.789,
      "step": 1639
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2933963537216187,
      "learning_rate": 0.00019245094606529331,
      "loss": 1.0149,
      "step": 1640
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4999351501464844,
      "learning_rate": 0.00019244179910076335,
      "loss": 1.8445,
      "step": 1641
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3204137086868286,
      "learning_rate": 0.00019243264681567603,
      "loss": 2.0827,
      "step": 1642
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.252161741256714,
      "learning_rate": 0.00019242348921055818,
      "loss": 1.4514,
      "step": 1643
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9288880825042725,
      "learning_rate": 0.00019241432628593682,
      "loss": 1.9415,
      "step": 1644
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1604593992233276,
      "learning_rate": 0.00019240515804233937,
      "loss": 1.2971,
      "step": 1645
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9947024583816528,
      "learning_rate": 0.00019239598448029347,
      "loss": 0.9571,
      "step": 1646
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1507415771484375,
      "learning_rate": 0.00019238680560032718,
      "loss": 1.6328,
      "step": 1647
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.8098084926605225,
      "learning_rate": 0.00019237762140296875,
      "loss": 2.2372,
      "step": 1648
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7258552312850952,
      "learning_rate": 0.00019236843188874674,
      "loss": 1.9552,
      "step": 1649
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8006431460380554,
      "learning_rate": 0.00019235923705819015,
      "loss": 1.1858,
      "step": 1650
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.138533353805542,
      "learning_rate": 0.00019235003691182813,
      "loss": 1.7925,
      "step": 1651
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3208112716674805,
      "learning_rate": 0.00019234083145019024,
      "loss": 1.855,
      "step": 1652
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0775055885314941,
      "learning_rate": 0.0001923316206738063,
      "loss": 1.6983,
      "step": 1653
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.7688762545585632,
      "learning_rate": 0.00019232240458320643,
      "loss": 0.9557,
      "step": 1654
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.097804069519043,
      "learning_rate": 0.00019231318317892106,
      "loss": 1.6316,
      "step": 1655
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3940991163253784,
      "learning_rate": 0.00019230395646148096,
      "loss": 1.2524,
      "step": 1656
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3134924173355103,
      "learning_rate": 0.0001922947244314172,
      "loss": 1.7486,
      "step": 1657
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0014907121658325,
      "learning_rate": 0.00019228548708926104,
      "loss": 1.6459,
      "step": 1658
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.219536304473877,
      "learning_rate": 0.00019227624443554425,
      "loss": 1.4076,
      "step": 1659
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5552051067352295,
      "learning_rate": 0.00019226699647079874,
      "loss": 1.8153,
      "step": 1660
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.800730288028717,
      "learning_rate": 0.00019225774319555684,
      "loss": 1.6047,
      "step": 1661
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5569366216659546,
      "learning_rate": 0.00019224848461035104,
      "loss": 2.081,
      "step": 1662
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1458699703216553,
      "learning_rate": 0.0001922392207157143,
      "loss": 2.0005,
      "step": 1663
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9474000930786133,
      "learning_rate": 0.00019222995151217976,
      "loss": 2.157,
      "step": 1664
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5861680507659912,
      "learning_rate": 0.00019222067700028098,
      "loss": 1.7014,
      "step": 1665
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.7790770530700684,
      "learning_rate": 0.0001922113971805517,
      "loss": 1.3885,
      "step": 1666
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0510035753250122,
      "learning_rate": 0.00019220211205352603,
      "loss": 1.4353,
      "step": 1667
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.396669864654541,
      "learning_rate": 0.00019219282161973844,
      "loss": 1.8268,
      "step": 1668
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3031939268112183,
      "learning_rate": 0.0001921835258797236,
      "loss": 1.7266,
      "step": 1669
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.0023000240325928,
      "learning_rate": 0.0001921742248340165,
      "loss": 2.2128,
      "step": 1670
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1263421773910522,
      "learning_rate": 0.00019216491848315256,
      "loss": 0.9568,
      "step": 1671
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1694365739822388,
      "learning_rate": 0.00019215560682766736,
      "loss": 1.2027,
      "step": 1672
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2933335304260254,
      "learning_rate": 0.0001921462898680968,
      "loss": 1.4741,
      "step": 1673
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3168654441833496,
      "learning_rate": 0.0001921369676049772,
      "loss": 1.8232,
      "step": 1674
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3570576906204224,
      "learning_rate": 0.0001921276400388451,
      "loss": 2.1316,
      "step": 1675
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.610437035560608,
      "learning_rate": 0.00019211830717023732,
      "loss": 1.7613,
      "step": 1676
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9002325534820557,
      "learning_rate": 0.00019210896899969103,
      "loss": 1.817,
      "step": 1677
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8880118727684021,
      "learning_rate": 0.00019209962552774372,
      "loss": 1.7212,
      "step": 1678
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7079182863235474,
      "learning_rate": 0.0001920902767549331,
      "loss": 1.7694,
      "step": 1679
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.5424506664276123,
      "learning_rate": 0.0001920809226817973,
      "loss": 1.8319,
      "step": 1680
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3167874813079834,
      "learning_rate": 0.00019207156330887474,
      "loss": 2.0918,
      "step": 1681
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.361312747001648,
      "learning_rate": 0.000192062198636704,
      "loss": 1.1717,
      "step": 1682
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1239681243896484,
      "learning_rate": 0.00019205282866582412,
      "loss": 1.7777,
      "step": 1683
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.445301055908203,
      "learning_rate": 0.00019204345339677442,
      "loss": 1.8606,
      "step": 1684
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9184565544128418,
      "learning_rate": 0.00019203407283009444,
      "loss": 1.2957,
      "step": 1685
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2013652324676514,
      "learning_rate": 0.00019202468696632416,
      "loss": 1.9815,
      "step": 1686
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1460682153701782,
      "learning_rate": 0.0001920152958060038,
      "loss": 1.4429,
      "step": 1687
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4190993309020996,
      "learning_rate": 0.00019200589934967376,
      "loss": 1.7652,
      "step": 1688
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.426649808883667,
      "learning_rate": 0.00019199649759787496,
      "loss": 1.9172,
      "step": 1689
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0495685338974,
      "learning_rate": 0.0001919870905511485,
      "loss": 1.6914,
      "step": 1690
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9887834787368774,
      "learning_rate": 0.0001919776782100358,
      "loss": 1.6269,
      "step": 1691
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.845858633518219,
      "learning_rate": 0.0001919682605750786,
      "loss": 1.3193,
      "step": 1692
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2695320844650269,
      "learning_rate": 0.00019195883764681893,
      "loss": 1.0679,
      "step": 1693
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3975179195404053,
      "learning_rate": 0.00019194940942579916,
      "loss": 1.8068,
      "step": 1694
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5853328704833984,
      "learning_rate": 0.0001919399759125619,
      "loss": 1.7801,
      "step": 1695
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2269155979156494,
      "learning_rate": 0.00019193053710765012,
      "loss": 2.0705,
      "step": 1696
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2942819595336914,
      "learning_rate": 0.0001919210930116071,
      "loss": 1.4555,
      "step": 1697
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5656911134719849,
      "learning_rate": 0.0001919116436249764,
      "loss": 1.9065,
      "step": 1698
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1716471910476685,
      "learning_rate": 0.00019190218894830186,
      "loss": 1.1178,
      "step": 1699
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9305847883224487,
      "learning_rate": 0.00019189272898212766,
      "loss": 1.5908,
      "step": 1700
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4021083116531372,
      "learning_rate": 0.00019188326372699825,
      "loss": 1.6574,
      "step": 1701
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.15850830078125,
      "learning_rate": 0.00019187379318345846,
      "loss": 1.3833,
      "step": 1702
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.6836242079734802,
      "learning_rate": 0.00019186431735205336,
      "loss": 1.3144,
      "step": 1703
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1249021291732788,
      "learning_rate": 0.0001918548362333283,
      "loss": 1.7508,
      "step": 1704
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8735843896865845,
      "learning_rate": 0.00019184534982782904,
      "loss": 1.6723,
      "step": 1705
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1225956678390503,
      "learning_rate": 0.00019183585813610152,
      "loss": 0.957,
      "step": 1706
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.934698224067688,
      "learning_rate": 0.00019182636115869207,
      "loss": 1.36,
      "step": 1707
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7530231475830078,
      "learning_rate": 0.00019181685889614727,
      "loss": 2.1734,
      "step": 1708
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.023768663406372,
      "learning_rate": 0.00019180735134901407,
      "loss": 2.3581,
      "step": 1709
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.103955626487732,
      "learning_rate": 0.00019179783851783967,
      "loss": 1.9341,
      "step": 1710
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.863527774810791,
      "learning_rate": 0.00019178832040317155,
      "loss": 2.4809,
      "step": 1711
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9039313197135925,
      "learning_rate": 0.00019177879700555756,
      "loss": 1.7504,
      "step": 1712
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3123403787612915,
      "learning_rate": 0.00019176926832554584,
      "loss": 1.7569,
      "step": 1713
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9392445087432861,
      "learning_rate": 0.0001917597343636848,
      "loss": 1.529,
      "step": 1714
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4459996223449707,
      "learning_rate": 0.00019175019512052317,
      "loss": 2.105,
      "step": 1715
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2494263648986816,
      "learning_rate": 0.00019174065059661004,
      "loss": 1.818,
      "step": 1716
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2566654682159424,
      "learning_rate": 0.00019173110079249468,
      "loss": 1.6748,
      "step": 1717
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6426937580108643,
      "learning_rate": 0.00019172154570872677,
      "loss": 1.3084,
      "step": 1718
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5476444959640503,
      "learning_rate": 0.0001917119853458563,
      "loss": 1.6668,
      "step": 1719
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.122986078262329,
      "learning_rate": 0.00019170241970443343,
      "loss": 1.3596,
      "step": 1720
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8274943232536316,
      "learning_rate": 0.00019169284878500878,
      "loss": 1.6019,
      "step": 1721
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.227142572402954,
      "learning_rate": 0.00019168327258813325,
      "loss": 1.0847,
      "step": 1722
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.9684174060821533,
      "learning_rate": 0.00019167369111435791,
      "loss": 1.8718,
      "step": 1723
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9800183773040771,
      "learning_rate": 0.0001916641043642343,
      "loss": 1.9856,
      "step": 1724
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.559999942779541,
      "learning_rate": 0.00019165451233831414,
      "loss": 2.1476,
      "step": 1725
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3987042903900146,
      "learning_rate": 0.00019164491503714958,
      "loss": 1.5954,
      "step": 1726
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1709119081497192,
      "learning_rate": 0.00019163531246129293,
      "loss": 1.2795,
      "step": 1727
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0861318111419678,
      "learning_rate": 0.00019162570461129686,
      "loss": 1.1572,
      "step": 1728
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.417551040649414,
      "learning_rate": 0.00019161609148771443,
      "loss": 1.8499,
      "step": 1729
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7129623889923096,
      "learning_rate": 0.0001916064730910989,
      "loss": 1.955,
      "step": 1730
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0231813192367554,
      "learning_rate": 0.00019159684942200389,
      "loss": 1.8056,
      "step": 1731
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6152998208999634,
      "learning_rate": 0.00019158722048098322,
      "loss": 1.4005,
      "step": 1732
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6666253805160522,
      "learning_rate": 0.00019157758626859117,
      "loss": 1.9287,
      "step": 1733
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1857613325119019,
      "learning_rate": 0.00019156794678538218,
      "loss": 1.6696,
      "step": 1734
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.22178053855896,
      "learning_rate": 0.00019155830203191113,
      "loss": 2.0788,
      "step": 1735
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8950700163841248,
      "learning_rate": 0.00019154865200873307,
      "loss": 1.9415,
      "step": 1736
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3596826791763306,
      "learning_rate": 0.00019153899671640344,
      "loss": 1.4958,
      "step": 1737
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8823072910308838,
      "learning_rate": 0.00019152933615547798,
      "loss": 1.3359,
      "step": 1738
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9493248462677002,
      "learning_rate": 0.00019151967032651263,
      "loss": 1.8648,
      "step": 1739
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.470755696296692,
      "learning_rate": 0.0001915099992300638,
      "loss": 1.7084,
      "step": 1740
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.914301633834839,
      "learning_rate": 0.00019150032286668808,
      "loss": 2.5535,
      "step": 1741
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1712515354156494,
      "learning_rate": 0.0001914906412369424,
      "loss": 1.3964,
      "step": 1742
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.417860746383667,
      "learning_rate": 0.00019148095434138402,
      "loss": 0.9681,
      "step": 1743
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3116376399993896,
      "learning_rate": 0.00019147126218057046,
      "loss": 1.171,
      "step": 1744
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4100656509399414,
      "learning_rate": 0.00019146156475505953,
      "loss": 2.3492,
      "step": 1745
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7083419561386108,
      "learning_rate": 0.0001914518620654094,
      "loss": 0.9709,
      "step": 1746
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4354631900787354,
      "learning_rate": 0.0001914421541121785,
      "loss": 2.6655,
      "step": 1747
    },
    {
      "epoch": 0.13,
      "grad_norm": 8.765703201293945,
      "learning_rate": 0.00019143244089592565,
      "loss": 4.518,
      "step": 1748
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3764034509658813,
      "learning_rate": 0.00019142272241720983,
      "loss": 1.8962,
      "step": 1749
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0626006126403809,
      "learning_rate": 0.00019141299867659036,
      "loss": 1.6102,
      "step": 1750
    },
    {
      "epoch": 0.13,
      "grad_norm": 4.371476650238037,
      "learning_rate": 0.00019140326967462699,
      "loss": 2.454,
      "step": 1751
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.237757682800293,
      "learning_rate": 0.00019139353541187962,
      "loss": 2.5196,
      "step": 1752
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5865955352783203,
      "learning_rate": 0.0001913837958889085,
      "loss": 1.9969,
      "step": 1753
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.8906533718109131,
      "learning_rate": 0.00019137405110627426,
      "loss": 1.4165,
      "step": 1754
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.512681484222412,
      "learning_rate": 0.00019136430106453777,
      "loss": 2.0024,
      "step": 1755
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3901095390319824,
      "learning_rate": 0.0001913545457642601,
      "loss": 1.3045,
      "step": 1756
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5089560747146606,
      "learning_rate": 0.00019134478520600281,
      "loss": 0.6133,
      "step": 1757
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9649224281311035,
      "learning_rate": 0.00019133501939032766,
      "loss": 1.9724,
      "step": 1758
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8871979713439941,
      "learning_rate": 0.0001913252483177967,
      "loss": 2.1681,
      "step": 1759
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.099989652633667,
      "learning_rate": 0.00019131547198897235,
      "loss": 1.67,
      "step": 1760
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7803196907043457,
      "learning_rate": 0.00019130569040441727,
      "loss": 1.4998,
      "step": 1761
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.193321943283081,
      "learning_rate": 0.00019129590356469446,
      "loss": 1.2642,
      "step": 1762
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1727501153945923,
      "learning_rate": 0.0001912861114703672,
      "loss": 2.0254,
      "step": 1763
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1773103475570679,
      "learning_rate": 0.0001912763141219991,
      "loss": 1.8422,
      "step": 1764
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.7886079549789429,
      "learning_rate": 0.00019126651152015403,
      "loss": 1.5749,
      "step": 1765
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7147310972213745,
      "learning_rate": 0.0001912567036653962,
      "loss": 1.758,
      "step": 1766
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.139351487159729,
      "learning_rate": 0.0001912468905582901,
      "loss": 1.9412,
      "step": 1767
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2984460592269897,
      "learning_rate": 0.0001912370721994005,
      "loss": 1.8174,
      "step": 1768
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.747037410736084,
      "learning_rate": 0.00019122724858929257,
      "loss": 2.0312,
      "step": 1769
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0246505737304688,
      "learning_rate": 0.0001912174197285317,
      "loss": 1.4383,
      "step": 1770
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.317384123802185,
      "learning_rate": 0.00019120758561768354,
      "loss": 2.3675,
      "step": 1771
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3326644897460938,
      "learning_rate": 0.00019119774625731416,
      "loss": 1.6489,
      "step": 1772
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0685300827026367,
      "learning_rate": 0.00019118790164798983,
      "loss": 1.5699,
      "step": 1773
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9181631803512573,
      "learning_rate": 0.00019117805179027722,
      "loss": 1.6733,
      "step": 1774
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3178566694259644,
      "learning_rate": 0.00019116819668474317,
      "loss": 1.6271,
      "step": 1775
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2199188470840454,
      "learning_rate": 0.00019115833633195494,
      "loss": 1.5192,
      "step": 1776
    },
    {
      "epoch": 0.14,
      "grad_norm": 4.171012878417969,
      "learning_rate": 0.00019114847073248008,
      "loss": 2.3557,
      "step": 1777
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.75469970703125,
      "learning_rate": 0.00019113859988688638,
      "loss": 1.3748,
      "step": 1778
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5910640954971313,
      "learning_rate": 0.00019112872379574195,
      "loss": 1.8142,
      "step": 1779
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0522754192352295,
      "learning_rate": 0.00019111884245961522,
      "loss": 1.9745,
      "step": 1780
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.058826208114624,
      "learning_rate": 0.00019110895587907495,
      "loss": 1.5581,
      "step": 1781
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9991682171821594,
      "learning_rate": 0.00019109906405469015,
      "loss": 1.6433,
      "step": 1782
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4166696071624756,
      "learning_rate": 0.00019108916698703013,
      "loss": 0.7991,
      "step": 1783
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5396959781646729,
      "learning_rate": 0.00019107926467666454,
      "loss": 1.6115,
      "step": 1784
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.087815523147583,
      "learning_rate": 0.00019106935712416334,
      "loss": 1.0554,
      "step": 1785
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4981147050857544,
      "learning_rate": 0.00019105944433009674,
      "loss": 1.3108,
      "step": 1786
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3164747953414917,
      "learning_rate": 0.00019104952629503527,
      "loss": 2.0168,
      "step": 1787
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9456087350845337,
      "learning_rate": 0.0001910396030195498,
      "loss": 2.0765,
      "step": 1788
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2241421937942505,
      "learning_rate": 0.00019102967450421147,
      "loss": 1.7729,
      "step": 1789
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3662259578704834,
      "learning_rate": 0.00019101974074959167,
      "loss": 2.5734,
      "step": 1790
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.649641752243042,
      "learning_rate": 0.00019100980175626217,
      "loss": 2.2434,
      "step": 1791
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3130284547805786,
      "learning_rate": 0.00019099985752479506,
      "loss": 1.8981,
      "step": 1792
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.6794470548629761,
      "learning_rate": 0.00019098990805576263,
      "loss": 0.7842,
      "step": 1793
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0814820528030396,
      "learning_rate": 0.00019097995334973756,
      "loss": 1.0963,
      "step": 1794
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.111374020576477,
      "learning_rate": 0.00019096999340729282,
      "loss": 1.6679,
      "step": 1795
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2098294496536255,
      "learning_rate": 0.00019096002822900163,
      "loss": 1.8817,
      "step": 1796
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2255605459213257,
      "learning_rate": 0.00019095005781543756,
      "loss": 1.8133,
      "step": 1797
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9196917414665222,
      "learning_rate": 0.00019094008216717442,
      "loss": 1.3671,
      "step": 1798
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.957831084728241,
      "learning_rate": 0.00019093010128478643,
      "loss": 1.8624,
      "step": 1799
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4498006105422974,
      "learning_rate": 0.000190920115168848,
      "loss": 1.9821,
      "step": 1800
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7889032959938049,
      "learning_rate": 0.0001909101238199339,
      "loss": 1.7953,
      "step": 1801
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9546078443527222,
      "learning_rate": 0.00019090012723861922,
      "loss": 1.643,
      "step": 1802
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.154266119003296,
      "learning_rate": 0.0001908901254254793,
      "loss": 1.3495,
      "step": 1803
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.001150131225586,
      "learning_rate": 0.0001908801183810898,
      "loss": 1.8323,
      "step": 1804
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3745360374450684,
      "learning_rate": 0.00019087010610602668,
      "loss": 2.5606,
      "step": 1805
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2616046667099,
      "learning_rate": 0.0001908600886008662,
      "loss": 1.4106,
      "step": 1806
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8200360536575317,
      "learning_rate": 0.00019085006586618493,
      "loss": 2.0929,
      "step": 1807
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3981008529663086,
      "learning_rate": 0.00019084003790255976,
      "loss": 2.1557,
      "step": 1808
    },
    {
      "epoch": 0.14,
      "grad_norm": 6.94113302230835,
      "learning_rate": 0.00019083000471056778,
      "loss": 2.2878,
      "step": 1809
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1292493343353271,
      "learning_rate": 0.00019081996629078657,
      "loss": 1.4168,
      "step": 1810
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7216403484344482,
      "learning_rate": 0.0001908099226437938,
      "loss": 1.2128,
      "step": 1811
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9316129684448242,
      "learning_rate": 0.0001907998737701676,
      "loss": 2.2984,
      "step": 1812
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1486129760742188,
      "learning_rate": 0.00019078981967048632,
      "loss": 1.6172,
      "step": 1813
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0818634033203125,
      "learning_rate": 0.00019077976034532866,
      "loss": 1.4214,
      "step": 1814
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.9153473377227783,
      "learning_rate": 0.0001907696957952735,
      "loss": 1.6161,
      "step": 1815
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2064279317855835,
      "learning_rate": 0.00019075962602090022,
      "loss": 1.8273,
      "step": 1816
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0441787242889404,
      "learning_rate": 0.00019074955102278836,
      "loss": 1.5801,
      "step": 1817
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9097084403038025,
      "learning_rate": 0.00019073947080151777,
      "loss": 1.9892,
      "step": 1818
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1876296997070312,
      "learning_rate": 0.00019072938535766865,
      "loss": 1.5308,
      "step": 1819
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1521166563034058,
      "learning_rate": 0.00019071929469182143,
      "loss": 1.9111,
      "step": 1820
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0224876403808594,
      "learning_rate": 0.00019070919880455694,
      "loss": 1.2398,
      "step": 1821
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.744231104850769,
      "learning_rate": 0.00019069909769645627,
      "loss": 2.072,
      "step": 1822
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9077439308166504,
      "learning_rate": 0.00019068899136810073,
      "loss": 1.4976,
      "step": 1823
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5515754222869873,
      "learning_rate": 0.00019067887982007206,
      "loss": 1.4858,
      "step": 1824
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.604141116142273,
      "learning_rate": 0.00019066876305295216,
      "loss": 1.814,
      "step": 1825
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5260870456695557,
      "learning_rate": 0.00019065864106732337,
      "loss": 1.4626,
      "step": 1826
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2452468872070312,
      "learning_rate": 0.00019064851386376826,
      "loss": 1.3903,
      "step": 1827
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5552839040756226,
      "learning_rate": 0.00019063838144286975,
      "loss": 1.994,
      "step": 1828
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2469031810760498,
      "learning_rate": 0.00019062824380521094,
      "loss": 1.2168,
      "step": 1829
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0451000928878784,
      "learning_rate": 0.00019061810095137533,
      "loss": 1.5933,
      "step": 1830
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2428553104400635,
      "learning_rate": 0.00019060795288194671,
      "loss": 2.7297,
      "step": 1831
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1112931966781616,
      "learning_rate": 0.0001905977995975092,
      "loss": 1.2562,
      "step": 1832
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.431990385055542,
      "learning_rate": 0.00019058764109864713,
      "loss": 1.893,
      "step": 1833
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3334721326828003,
      "learning_rate": 0.0001905774773859452,
      "loss": 1.3043,
      "step": 1834
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9043070673942566,
      "learning_rate": 0.00019056730845998834,
      "loss": 1.8072,
      "step": 1835
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.547874689102173,
      "learning_rate": 0.00019055713432136192,
      "loss": 1.5206,
      "step": 1836
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4288489818572998,
      "learning_rate": 0.00019054695497065143,
      "loss": 1.3057,
      "step": 1837
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9321503043174744,
      "learning_rate": 0.00019053677040844282,
      "loss": 2.0075,
      "step": 1838
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.212302803993225,
      "learning_rate": 0.00019052658063532224,
      "loss": 1.0138,
      "step": 1839
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8567163348197937,
      "learning_rate": 0.00019051638565187616,
      "loss": 1.7718,
      "step": 1840
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3942313194274902,
      "learning_rate": 0.00019050618545869138,
      "loss": 1.1429,
      "step": 1841
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.243147611618042,
      "learning_rate": 0.00019049598005635497,
      "loss": 1.5667,
      "step": 1842
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.091018795967102,
      "learning_rate": 0.00019048576944545432,
      "loss": 0.9231,
      "step": 1843
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.093033790588379,
      "learning_rate": 0.00019047555362657713,
      "loss": 1.3997,
      "step": 1844
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.5629420280456543,
      "learning_rate": 0.00019046533260031132,
      "loss": 1.775,
      "step": 1845
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2814898490905762,
      "learning_rate": 0.0001904551063672452,
      "loss": 1.1812,
      "step": 1846
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.829736053943634,
      "learning_rate": 0.00019044487492796735,
      "loss": 1.3443,
      "step": 1847
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.9250993728637695,
      "learning_rate": 0.00019043463828306666,
      "loss": 2.2681,
      "step": 1848
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.608565330505371,
      "learning_rate": 0.00019042439643313227,
      "loss": 0.931,
      "step": 1849
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9403356313705444,
      "learning_rate": 0.0001904141493787537,
      "loss": 1.9366,
      "step": 1850
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.275115966796875,
      "learning_rate": 0.0001904038971205207,
      "loss": 2.0724,
      "step": 1851
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6276860237121582,
      "learning_rate": 0.00019039363965902336,
      "loss": 1.6479,
      "step": 1852
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.213547945022583,
      "learning_rate": 0.00019038337699485208,
      "loss": 1.5713,
      "step": 1853
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.755878210067749,
      "learning_rate": 0.00019037310912859744,
      "loss": 1.4738,
      "step": 1854
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.87269926071167,
      "learning_rate": 0.00019036283606085053,
      "loss": 2.006,
      "step": 1855
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0669002532958984,
      "learning_rate": 0.0001903525577922026,
      "loss": 1.5716,
      "step": 1856
    },
    {
      "epoch": 0.14,
      "grad_norm": 4.305464744567871,
      "learning_rate": 0.00019034227432324516,
      "loss": 2.5001,
      "step": 1857
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.133016586303711,
      "learning_rate": 0.00019033198565457013,
      "loss": 1.4654,
      "step": 1858
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0379645824432373,
      "learning_rate": 0.00019032169178676966,
      "loss": 1.6269,
      "step": 1859
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8892016410827637,
      "learning_rate": 0.00019031139272043625,
      "loss": 1.7661,
      "step": 1860
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8388110399246216,
      "learning_rate": 0.00019030108845616265,
      "loss": 2.4699,
      "step": 1861
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1200343370437622,
      "learning_rate": 0.00019029077899454194,
      "loss": 1.7745,
      "step": 1862
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5102815628051758,
      "learning_rate": 0.0001902804643361675,
      "loss": 1.9091,
      "step": 1863
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1856437921524048,
      "learning_rate": 0.00019027014448163296,
      "loss": 2.0454,
      "step": 1864
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4782395362854004,
      "learning_rate": 0.0001902598194315323,
      "loss": 2.1064,
      "step": 1865
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3251301050186157,
      "learning_rate": 0.00019024948918645981,
      "loss": 1.9988,
      "step": 1866
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.993800163269043,
      "learning_rate": 0.00019023915374701003,
      "loss": 0.9968,
      "step": 1867
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.424069881439209,
      "learning_rate": 0.00019022881311377786,
      "loss": 1.4555,
      "step": 1868
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7688577771186829,
      "learning_rate": 0.0001902184672873584,
      "loss": 1.578,
      "step": 1869
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3305647373199463,
      "learning_rate": 0.0001902081162683472,
      "loss": 1.9777,
      "step": 1870
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0257076025009155,
      "learning_rate": 0.00019019776005733992,
      "loss": 1.6609,
      "step": 1871
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2889742851257324,
      "learning_rate": 0.0001901873986549327,
      "loss": 1.6413,
      "step": 1872
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.090398073196411,
      "learning_rate": 0.00019017703206172185,
      "loss": 1.7216,
      "step": 1873
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8075790405273438,
      "learning_rate": 0.00019016666027830406,
      "loss": 1.4502,
      "step": 1874
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3650598526000977,
      "learning_rate": 0.00019015628330527627,
      "loss": 1.5965,
      "step": 1875
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0886167287826538,
      "learning_rate": 0.0001901459011432357,
      "loss": 1.6669,
      "step": 1876
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9369934797286987,
      "learning_rate": 0.00019013551379278,
      "loss": 1.5975,
      "step": 1877
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5225083827972412,
      "learning_rate": 0.0001901251212545069,
      "loss": 1.2679,
      "step": 1878
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6648659706115723,
      "learning_rate": 0.00019011472352901466,
      "loss": 1.8806,
      "step": 1879
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6486594676971436,
      "learning_rate": 0.00019010432061690165,
      "loss": 1.1304,
      "step": 1880
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9979268908500671,
      "learning_rate": 0.00019009391251876662,
      "loss": 1.6946,
      "step": 1881
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8353814482688904,
      "learning_rate": 0.0001900834992352087,
      "loss": 1.3906,
      "step": 1882
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0246020555496216,
      "learning_rate": 0.00019007308076682714,
      "loss": 1.8748,
      "step": 1883
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.8917340040206909,
      "learning_rate": 0.00019006265711422164,
      "loss": 1.4191,
      "step": 1884
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2512869834899902,
      "learning_rate": 0.00019005222827799212,
      "loss": 2.1864,
      "step": 1885
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7613725066184998,
      "learning_rate": 0.0001900417942587388,
      "loss": 1.7416,
      "step": 1886
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3527016639709473,
      "learning_rate": 0.00019003135505706227,
      "loss": 2.1082,
      "step": 1887
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.170444130897522,
      "learning_rate": 0.0001900209106735633,
      "loss": 1.6357,
      "step": 1888
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.029449224472046,
      "learning_rate": 0.0001900104611088431,
      "loss": 1.9781,
      "step": 1889
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2973337173461914,
      "learning_rate": 0.00019000000636350302,
      "loss": 2.0232,
      "step": 1890
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7028743028640747,
      "learning_rate": 0.00018998954643814484,
      "loss": 1.4866,
      "step": 1891
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9556809663772583,
      "learning_rate": 0.00018997908133337057,
      "loss": 2.0859,
      "step": 1892
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.171640396118164,
      "learning_rate": 0.00018996861104978257,
      "loss": 1.4403,
      "step": 1893
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1278774738311768,
      "learning_rate": 0.00018995813558798347,
      "loss": 1.8004,
      "step": 1894
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3321677446365356,
      "learning_rate": 0.0001899476549485761,
      "loss": 1.5438,
      "step": 1895
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.881230354309082,
      "learning_rate": 0.0001899371691321638,
      "loss": 1.6261,
      "step": 1896
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4219417572021484,
      "learning_rate": 0.00018992667813935002,
      "loss": 1.7149,
      "step": 1897
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.9149611592292786,
      "learning_rate": 0.0001899161819707386,
      "loss": 1.4352,
      "step": 1898
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.7588641047477722,
      "learning_rate": 0.00018990568062693362,
      "loss": 1.442,
      "step": 1899
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.325399875640869,
      "learning_rate": 0.00018989517410853955,
      "loss": 1.3184,
      "step": 1900
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8497614860534668,
      "learning_rate": 0.00018988466241616104,
      "loss": 1.97,
      "step": 1901
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.432100772857666,
      "learning_rate": 0.00018987414555040317,
      "loss": 2.2224,
      "step": 1902
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9388794898986816,
      "learning_rate": 0.0001898636235118712,
      "loss": 1.5608,
      "step": 1903
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.70521879196167,
      "learning_rate": 0.0001898530963011707,
      "loss": 1.9479,
      "step": 1904
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1594434976577759,
      "learning_rate": 0.00018984256391890765,
      "loss": 0.8081,
      "step": 1905
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0352566242218018,
      "learning_rate": 0.00018983202636568816,
      "loss": 2.0814,
      "step": 1906
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8302700519561768,
      "learning_rate": 0.00018982148364211885,
      "loss": 1.98,
      "step": 1907
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2764880657196045,
      "learning_rate": 0.0001898109357488064,
      "loss": 1.7626,
      "step": 1908
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.0174248218536377,
      "learning_rate": 0.00018980038268635795,
      "loss": 1.6457,
      "step": 1909
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5138697624206543,
      "learning_rate": 0.0001897898244553809,
      "loss": 1.4563,
      "step": 1910
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0162683725357056,
      "learning_rate": 0.00018977926105648288,
      "loss": 1.81,
      "step": 1911
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0599472522735596,
      "learning_rate": 0.00018976869249027196,
      "loss": 2.1859,
      "step": 1912
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3075370788574219,
      "learning_rate": 0.00018975811875735635,
      "loss": 1.8123,
      "step": 1913
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1327661275863647,
      "learning_rate": 0.00018974753985834464,
      "loss": 0.9752,
      "step": 1914
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6791529655456543,
      "learning_rate": 0.00018973695579384576,
      "loss": 2.0869,
      "step": 1915
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.200542688369751,
      "learning_rate": 0.0001897263665644688,
      "loss": 1.2935,
      "step": 1916
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5706309080123901,
      "learning_rate": 0.00018971577217082332,
      "loss": 1.2565,
      "step": 1917
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1499958038330078,
      "learning_rate": 0.00018970517261351902,
      "loss": 1.7881,
      "step": 1918
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5503169298171997,
      "learning_rate": 0.000189694567893166,
      "loss": 1.4265,
      "step": 1919
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.540010452270508,
      "learning_rate": 0.00018968395801037462,
      "loss": 1.5144,
      "step": 1920
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6675775051116943,
      "learning_rate": 0.00018967334296575553,
      "loss": 2.0153,
      "step": 1921
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1427396535873413,
      "learning_rate": 0.00018966272275991968,
      "loss": 1.939,
      "step": 1922
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.429444670677185,
      "learning_rate": 0.00018965209739347833,
      "loss": 1.659,
      "step": 1923
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7621562480926514,
      "learning_rate": 0.00018964146686704304,
      "loss": 2.1677,
      "step": 1924
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1480278968811035,
      "learning_rate": 0.00018963083118122564,
      "loss": 1.3889,
      "step": 1925
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6818525791168213,
      "learning_rate": 0.00018962019033663831,
      "loss": 1.3295,
      "step": 1926
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0426137447357178,
      "learning_rate": 0.00018960954433389345,
      "loss": 1.804,
      "step": 1927
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.769566297531128,
      "learning_rate": 0.00018959889317360387,
      "loss": 1.8326,
      "step": 1928
    },
    {
      "epoch": 0.15,
      "grad_norm": 9.05367374420166,
      "learning_rate": 0.0001895882368563825,
      "loss": 1.3948,
      "step": 1929
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.385718584060669,
      "learning_rate": 0.00018957757538284273,
      "loss": 1.3534,
      "step": 1930
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.878578543663025,
      "learning_rate": 0.0001895669087535982,
      "loss": 2.0149,
      "step": 1931
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.7668727040290833,
      "learning_rate": 0.0001895562369692628,
      "loss": 1.2586,
      "step": 1932
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.7863152027130127,
      "learning_rate": 0.0001895455600304508,
      "loss": 1.7061,
      "step": 1933
    },
    {
      "epoch": 0.15,
      "grad_norm": 4.3018012046813965,
      "learning_rate": 0.00018953487793777667,
      "loss": 1.828,
      "step": 1934
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.417961359024048,
      "learning_rate": 0.0001895241906918553,
      "loss": 1.0175,
      "step": 1935
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7621126174926758,
      "learning_rate": 0.00018951349829330168,
      "loss": 1.7521,
      "step": 1936
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5344575643539429,
      "learning_rate": 0.0001895028007427313,
      "loss": 2.2194,
      "step": 1937
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4481064081192017,
      "learning_rate": 0.0001894920980407599,
      "loss": 1.8145,
      "step": 1938
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2235581874847412,
      "learning_rate": 0.0001894813901880034,
      "loss": 1.1922,
      "step": 1939
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8004369735717773,
      "learning_rate": 0.00018947067718507815,
      "loss": 1.814,
      "step": 1940
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8632280230522156,
      "learning_rate": 0.00018945995903260073,
      "loss": 0.9155,
      "step": 1941
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2106592655181885,
      "learning_rate": 0.00018944923573118805,
      "loss": 1.5055,
      "step": 1942
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8940210342407227,
      "learning_rate": 0.00018943850728145724,
      "loss": 1.8415,
      "step": 1943
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.153320550918579,
      "learning_rate": 0.00018942777368402588,
      "loss": 1.4331,
      "step": 1944
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.240896463394165,
      "learning_rate": 0.00018941703493951164,
      "loss": 1.6976,
      "step": 1945
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3945552110671997,
      "learning_rate": 0.0001894062910485327,
      "loss": 1.8688,
      "step": 1946
    },
    {
      "epoch": 0.15,
      "grad_norm": 6.797032356262207,
      "learning_rate": 0.00018939554201170735,
      "loss": 2.1043,
      "step": 1947
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8592055439949036,
      "learning_rate": 0.00018938478782965429,
      "loss": 1.4563,
      "step": 1948
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2548441886901855,
      "learning_rate": 0.0001893740285029925,
      "loss": 1.619,
      "step": 1949
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9046580195426941,
      "learning_rate": 0.00018936326403234125,
      "loss": 1.3913,
      "step": 1950
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8102152943611145,
      "learning_rate": 0.00018935249441832003,
      "loss": 1.8747,
      "step": 1951
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1430169343948364,
      "learning_rate": 0.00018934171966154877,
      "loss": 1.7928,
      "step": 1952
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.479280710220337,
      "learning_rate": 0.00018933093976264756,
      "loss": 1.0721,
      "step": 1953
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0991309881210327,
      "learning_rate": 0.00018932015472223693,
      "loss": 1.7145,
      "step": 1954
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8856311440467834,
      "learning_rate": 0.00018930936454093753,
      "loss": 2.0301,
      "step": 1955
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4059087038040161,
      "learning_rate": 0.0001892985692193704,
      "loss": 2.1702,
      "step": 1956
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2310731410980225,
      "learning_rate": 0.00018928776875815694,
      "loss": 1.3761,
      "step": 1957
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.444058656692505,
      "learning_rate": 0.00018927696315791876,
      "loss": 1.454,
      "step": 1958
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5383989810943604,
      "learning_rate": 0.00018926615241927777,
      "loss": 1.8525,
      "step": 1959
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.223853349685669,
      "learning_rate": 0.00018925533654285615,
      "loss": 1.1957,
      "step": 1960
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1021978855133057,
      "learning_rate": 0.00018924451552927647,
      "loss": 1.3926,
      "step": 1961
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3073933124542236,
      "learning_rate": 0.00018923368937916153,
      "loss": 1.5114,
      "step": 1962
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0997318029403687,
      "learning_rate": 0.00018922285809313443,
      "loss": 1.7363,
      "step": 1963
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1403025388717651,
      "learning_rate": 0.00018921202167181857,
      "loss": 1.1607,
      "step": 1964
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1577814817428589,
      "learning_rate": 0.00018920118011583767,
      "loss": 0.7963,
      "step": 1965
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7507951259613037,
      "learning_rate": 0.00018919033342581568,
      "loss": 1.524,
      "step": 1966
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.362666368484497,
      "learning_rate": 0.00018917948160237694,
      "loss": 1.945,
      "step": 1967
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.8078129291534424,
      "learning_rate": 0.000189168624646146,
      "loss": 2.0306,
      "step": 1968
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6695241928100586,
      "learning_rate": 0.0001891577625577478,
      "loss": 1.6983,
      "step": 1969
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6425565481185913,
      "learning_rate": 0.0001891468953378074,
      "loss": 1.4671,
      "step": 1970
    },
    {
      "epoch": 0.15,
      "grad_norm": 4.419478893280029,
      "learning_rate": 0.00018913602298695036,
      "loss": 3.1029,
      "step": 1971
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9511110782623291,
      "learning_rate": 0.00018912514550580242,
      "loss": 1.4669,
      "step": 1972
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3962281942367554,
      "learning_rate": 0.00018911426289498965,
      "loss": 1.9075,
      "step": 1973
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5630033016204834,
      "learning_rate": 0.00018910337515513846,
      "loss": 1.8271,
      "step": 1974
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6481261253356934,
      "learning_rate": 0.00018909248228687536,
      "loss": 1.6469,
      "step": 1975
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.7859928607940674,
      "learning_rate": 0.00018908158429082743,
      "loss": 0.8485,
      "step": 1976
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4828720092773438,
      "learning_rate": 0.00018907068116762185,
      "loss": 2.0068,
      "step": 1977
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6363416910171509,
      "learning_rate": 0.00018905977291788618,
      "loss": 1.4753,
      "step": 1978
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.34080970287323,
      "learning_rate": 0.00018904885954224826,
      "loss": 2.3768,
      "step": 1979
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7011269330978394,
      "learning_rate": 0.0001890379410413362,
      "loss": 1.2603,
      "step": 1980
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2066195011138916,
      "learning_rate": 0.0001890270174157784,
      "loss": 1.2714,
      "step": 1981
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.043527841567993,
      "learning_rate": 0.0001890160886662036,
      "loss": 1.7577,
      "step": 1982
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3152480125427246,
      "learning_rate": 0.00018900515479324084,
      "loss": 1.9968,
      "step": 1983
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9221529364585876,
      "learning_rate": 0.00018899421579751945,
      "loss": 1.9817,
      "step": 1984
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0070123672485352,
      "learning_rate": 0.0001889832716796689,
      "loss": 1.7954,
      "step": 1985
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.283304214477539,
      "learning_rate": 0.00018897232244031922,
      "loss": 1.8972,
      "step": 1986
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0376882553100586,
      "learning_rate": 0.00018896136808010052,
      "loss": 1.4199,
      "step": 1987
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8131475448608398,
      "learning_rate": 0.00018895040859964334,
      "loss": 1.4258,
      "step": 1988
    },
    {
      "epoch": 0.15,
      "grad_norm": 6.429821968078613,
      "learning_rate": 0.00018893944399957845,
      "loss": 2.6025,
      "step": 1989
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3626892566680908,
      "learning_rate": 0.00018892847428053693,
      "loss": 1.9042,
      "step": 1990
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6344488859176636,
      "learning_rate": 0.0001889174994431501,
      "loss": 2.3087,
      "step": 1991
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6153130531311035,
      "learning_rate": 0.00018890651948804966,
      "loss": 2.3594,
      "step": 1992
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8225684762001038,
      "learning_rate": 0.0001888955344158676,
      "loss": 1.5843,
      "step": 1993
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.825460910797119,
      "learning_rate": 0.00018888454422723614,
      "loss": 1.7278,
      "step": 1994
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3466788530349731,
      "learning_rate": 0.00018887354892278783,
      "loss": 1.6056,
      "step": 1995
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.1936187744140625,
      "learning_rate": 0.00018886254850315553,
      "loss": 1.9387,
      "step": 1996
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2573399543762207,
      "learning_rate": 0.00018885154296897233,
      "loss": 0.9741,
      "step": 1997
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.27701735496521,
      "learning_rate": 0.00018884053232087172,
      "loss": 1.6047,
      "step": 1998
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3806307315826416,
      "learning_rate": 0.0001888295165594874,
      "loss": 1.5262,
      "step": 1999
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5553964376449585,
      "learning_rate": 0.00018881849568545342,
      "loss": 1.2305,
      "step": 2000
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2926298379898071,
      "learning_rate": 0.00018880746969940403,
      "loss": 1.49,
      "step": 2001
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0032620429992676,
      "learning_rate": 0.0001887964386019739,
      "loss": 1.5842,
      "step": 2002
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.520771861076355,
      "learning_rate": 0.0001887854023937979,
      "loss": 1.7105,
      "step": 2003
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8807218074798584,
      "learning_rate": 0.00018877436107551127,
      "loss": 2.2974,
      "step": 2004
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6052244901657104,
      "learning_rate": 0.00018876331464774945,
      "loss": 1.8697,
      "step": 2005
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9801937341690063,
      "learning_rate": 0.0001887522631111482,
      "loss": 1.034,
      "step": 2006
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.773953378200531,
      "learning_rate": 0.00018874120646634367,
      "loss": 1.0813,
      "step": 2007
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.483612060546875,
      "learning_rate": 0.00018873014471397224,
      "loss": 1.7327,
      "step": 2008
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.025646209716797,
      "learning_rate": 0.0001887190778546705,
      "loss": 1.4313,
      "step": 2009
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9720602631568909,
      "learning_rate": 0.0001887080058890755,
      "loss": 0.9199,
      "step": 2010
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8763347268104553,
      "learning_rate": 0.0001886969288178244,
      "loss": 1.2676,
      "step": 2011
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6603385210037231,
      "learning_rate": 0.00018868584664155486,
      "loss": 1.9182,
      "step": 2012
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.8933584094047546,
      "learning_rate": 0.00018867475936090462,
      "loss": 1.7563,
      "step": 2013
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2418975830078125,
      "learning_rate": 0.00018866366697651188,
      "loss": 2.1608,
      "step": 2014
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4429924488067627,
      "learning_rate": 0.00018865256948901506,
      "loss": 1.6645,
      "step": 2015
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.766158938407898,
      "learning_rate": 0.00018864146689905287,
      "loss": 1.5527,
      "step": 2016
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4053832292556763,
      "learning_rate": 0.00018863035920726432,
      "loss": 1.4818,
      "step": 2017
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6227309703826904,
      "learning_rate": 0.00018861924641428872,
      "loss": 1.5641,
      "step": 2018
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.0214093923568726,
      "learning_rate": 0.00018860812852076572,
      "loss": 1.1605,
      "step": 2019
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9991535544395447,
      "learning_rate": 0.00018859700552733516,
      "loss": 1.6647,
      "step": 2020
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.500903606414795,
      "learning_rate": 0.0001885858774346373,
      "loss": 0.9839,
      "step": 2021
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9408882856369019,
      "learning_rate": 0.00018857474424331257,
      "loss": 1.4169,
      "step": 2022
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.127798318862915,
      "learning_rate": 0.00018856360595400178,
      "loss": 1.4825,
      "step": 2023
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.9365410804748535,
      "learning_rate": 0.00018855246256734595,
      "loss": 1.4416,
      "step": 2024
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9265363216400146,
      "learning_rate": 0.00018854131408398652,
      "loss": 1.3593,
      "step": 2025
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1780685186386108,
      "learning_rate": 0.0001885301605045651,
      "loss": 1.8449,
      "step": 2026
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2007399797439575,
      "learning_rate": 0.00018851900182972366,
      "loss": 1.706,
      "step": 2027
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.213413119316101,
      "learning_rate": 0.00018850783806010444,
      "loss": 1.6364,
      "step": 2028
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.134323000907898,
      "learning_rate": 0.00018849666919635,
      "loss": 1.2359,
      "step": 2029
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4994550943374634,
      "learning_rate": 0.00018848549523910313,
      "loss": 1.6679,
      "step": 2030
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1935241222381592,
      "learning_rate": 0.00018847431618900697,
      "loss": 1.0477,
      "step": 2031
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.063713550567627,
      "learning_rate": 0.00018846313204670497,
      "loss": 1.5611,
      "step": 2032
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1046332120895386,
      "learning_rate": 0.0001884519428128408,
      "loss": 1.3302,
      "step": 2033
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0166854858398438,
      "learning_rate": 0.0001884407484880585,
      "loss": 1.3479,
      "step": 2034
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4401425123214722,
      "learning_rate": 0.00018842954907300236,
      "loss": 1.912,
      "step": 2035
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4430768489837646,
      "learning_rate": 0.00018841834456831698,
      "loss": 0.8893,
      "step": 2036
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1081792116165161,
      "learning_rate": 0.00018840713497464718,
      "loss": 1.2528,
      "step": 2037
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6285537481307983,
      "learning_rate": 0.0001883959202926382,
      "loss": 1.9987,
      "step": 2038
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1248494386672974,
      "learning_rate": 0.00018838470052293552,
      "loss": 1.5454,
      "step": 2039
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9075058102607727,
      "learning_rate": 0.00018837347566618483,
      "loss": 1.1359,
      "step": 2040
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4617743492126465,
      "learning_rate": 0.0001883622457230323,
      "loss": 1.7554,
      "step": 2041
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3647927045822144,
      "learning_rate": 0.00018835101069412415,
      "loss": 2.0007,
      "step": 2042
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.759634017944336,
      "learning_rate": 0.0001883397705801071,
      "loss": 0.5072,
      "step": 2043
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6689002513885498,
      "learning_rate": 0.00018832852538162804,
      "loss": 1.9432,
      "step": 2044
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4413180351257324,
      "learning_rate": 0.00018831727509933423,
      "loss": 1.2151,
      "step": 2045
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0116751194000244,
      "learning_rate": 0.00018830601973387318,
      "loss": 1.7493,
      "step": 2046
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.943244457244873,
      "learning_rate": 0.00018829475928589271,
      "loss": 1.7864,
      "step": 2047
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9664738178253174,
      "learning_rate": 0.00018828349375604088,
      "loss": 1.6607,
      "step": 2048
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.040297508239746,
      "learning_rate": 0.00018827222314496614,
      "loss": 1.5829,
      "step": 2049
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8655730485916138,
      "learning_rate": 0.00018826094745331715,
      "loss": 1.6274,
      "step": 2050
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4308651685714722,
      "learning_rate": 0.0001882496666817429,
      "loss": 1.5477,
      "step": 2051
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2698777914047241,
      "learning_rate": 0.00018823838083089267,
      "loss": 1.7361,
      "step": 2052
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4533405303955078,
      "learning_rate": 0.000188227089901416,
      "loss": 1.9803,
      "step": 2053
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3478468656539917,
      "learning_rate": 0.0001882157938939628,
      "loss": 1.2965,
      "step": 2054
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7483283281326294,
      "learning_rate": 0.0001882044928091831,
      "loss": 1.4458,
      "step": 2055
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5647121667861938,
      "learning_rate": 0.00018819318664772748,
      "loss": 1.5736,
      "step": 2056
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1655994653701782,
      "learning_rate": 0.00018818187541024665,
      "loss": 1.9078,
      "step": 2057
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.068272590637207,
      "learning_rate": 0.00018817055909739155,
      "loss": 1.9069,
      "step": 2058
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2354148626327515,
      "learning_rate": 0.00018815923770981358,
      "loss": 1.9652,
      "step": 2059
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0439502000808716,
      "learning_rate": 0.00018814791124816433,
      "loss": 1.0013,
      "step": 2060
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1820775270462036,
      "learning_rate": 0.0001881365797130957,
      "loss": 1.8314,
      "step": 2061
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1166318655014038,
      "learning_rate": 0.0001881252431052599,
      "loss": 1.9372,
      "step": 2062
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.9165799617767334,
      "learning_rate": 0.00018811390142530935,
      "loss": 2.6411,
      "step": 2063
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8556538820266724,
      "learning_rate": 0.00018810255467389695,
      "loss": 1.8403,
      "step": 2064
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7367777824401855,
      "learning_rate": 0.00018809120285167565,
      "loss": 1.6107,
      "step": 2065
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1062967777252197,
      "learning_rate": 0.0001880798459592989,
      "loss": 1.7442,
      "step": 2066
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1454769372940063,
      "learning_rate": 0.00018806848399742032,
      "loss": 1.3146,
      "step": 2067
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3517775535583496,
      "learning_rate": 0.0001880571169666938,
      "loss": 1.2889,
      "step": 2068
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2206729650497437,
      "learning_rate": 0.0001880457448677737,
      "loss": 1.6155,
      "step": 2069
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5644097328186035,
      "learning_rate": 0.00018803436770131447,
      "loss": 2.2028,
      "step": 2070
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.278477191925049,
      "learning_rate": 0.00018802298546797094,
      "loss": 1.4913,
      "step": 2071
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1849174499511719,
      "learning_rate": 0.0001880115981683982,
      "loss": 1.0202,
      "step": 2072
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9796518087387085,
      "learning_rate": 0.00018800020580325168,
      "loss": 1.3906,
      "step": 2073
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1724759340286255,
      "learning_rate": 0.00018798880837318712,
      "loss": 1.484,
      "step": 2074
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.879902720451355,
      "learning_rate": 0.00018797740587886045,
      "loss": 0.9438,
      "step": 2075
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.2834508419036865,
      "learning_rate": 0.00018796599832092797,
      "loss": 2.6453,
      "step": 2076
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0067797899246216,
      "learning_rate": 0.00018795458570004624,
      "loss": 1.8985,
      "step": 2077
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.109568476676941,
      "learning_rate": 0.0001879431680168721,
      "loss": 1.5811,
      "step": 2078
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1894155740737915,
      "learning_rate": 0.00018793174527206277,
      "loss": 1.4658,
      "step": 2079
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7263264656066895,
      "learning_rate": 0.00018792031746627563,
      "loss": 1.5696,
      "step": 2080
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1340954303741455,
      "learning_rate": 0.00018790888460016846,
      "loss": 1.6292,
      "step": 2081
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4047815799713135,
      "learning_rate": 0.00018789744667439926,
      "loss": 1.6531,
      "step": 2082
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.408769130706787,
      "learning_rate": 0.00018788600368962636,
      "loss": 2.1306,
      "step": 2083
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0977435111999512,
      "learning_rate": 0.0001878745556465084,
      "loss": 1.8127,
      "step": 2084
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8030884861946106,
      "learning_rate": 0.0001878631025457042,
      "loss": 1.454,
      "step": 2085
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.296264410018921,
      "learning_rate": 0.000187851644387873,
      "loss": 1.7129,
      "step": 2086
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.742519497871399,
      "learning_rate": 0.0001878401811736743,
      "loss": 1.7022,
      "step": 2087
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9260883331298828,
      "learning_rate": 0.00018782871290376786,
      "loss": 2.0175,
      "step": 2088
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.978373110294342,
      "learning_rate": 0.00018781723957881372,
      "loss": 1.5692,
      "step": 2089
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.37165367603302,
      "learning_rate": 0.00018780576119947228,
      "loss": 1.2758,
      "step": 2090
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0631681680679321,
      "learning_rate": 0.00018779427776640417,
      "loss": 1.3617,
      "step": 2091
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0531198978424072,
      "learning_rate": 0.0001877827892802703,
      "loss": 1.2715,
      "step": 2092
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8536824584007263,
      "learning_rate": 0.0001877712957417319,
      "loss": 1.3289,
      "step": 2093
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1413264274597168,
      "learning_rate": 0.00018775979715145055,
      "loss": 1.7688,
      "step": 2094
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3185834884643555,
      "learning_rate": 0.000187748293510088,
      "loss": 1.3869,
      "step": 2095
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0372824668884277,
      "learning_rate": 0.00018773678481830638,
      "loss": 1.3966,
      "step": 2096
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.183793306350708,
      "learning_rate": 0.00018772527107676807,
      "loss": 1.4547,
      "step": 2097
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.033815383911133,
      "learning_rate": 0.00018771375228613578,
      "loss": 1.6273,
      "step": 2098
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.521685838699341,
      "learning_rate": 0.00018770222844707242,
      "loss": 1.7656,
      "step": 2099
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0152688026428223,
      "learning_rate": 0.0001876906995602413,
      "loss": 0.9186,
      "step": 2100
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0908173322677612,
      "learning_rate": 0.00018767916562630597,
      "loss": 1.6924,
      "step": 2101
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4836212396621704,
      "learning_rate": 0.00018766762664593025,
      "loss": 1.0177,
      "step": 2102
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0849279165267944,
      "learning_rate": 0.0001876560826197783,
      "loss": 1.8447,
      "step": 2103
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4232945442199707,
      "learning_rate": 0.00018764453354851455,
      "loss": 1.0633,
      "step": 2104
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.751225233078003,
      "learning_rate": 0.00018763297943280368,
      "loss": 1.9467,
      "step": 2105
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6446977853775024,
      "learning_rate": 0.00018762142027331074,
      "loss": 1.9371,
      "step": 2106
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1049995422363281,
      "learning_rate": 0.000187609856070701,
      "loss": 1.1904,
      "step": 2107
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9296199083328247,
      "learning_rate": 0.00018759828682564004,
      "loss": 1.2412,
      "step": 2108
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.258453607559204,
      "learning_rate": 0.00018758671253879373,
      "loss": 1.5953,
      "step": 2109
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3696626424789429,
      "learning_rate": 0.0001875751332108283,
      "loss": 1.7719,
      "step": 2110
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9997381567955017,
      "learning_rate": 0.00018756354884241012,
      "loss": 2.0716,
      "step": 2111
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.78246808052063,
      "learning_rate": 0.000187551959434206,
      "loss": 2.0231,
      "step": 2112
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.323215365409851,
      "learning_rate": 0.00018754036498688293,
      "loss": 1.2831,
      "step": 2113
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5667709112167358,
      "learning_rate": 0.00018752876550110831,
      "loss": 1.8707,
      "step": 2114
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1221048831939697,
      "learning_rate": 0.00018751716097754968,
      "loss": 1.2652,
      "step": 2115
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.695735454559326,
      "learning_rate": 0.000187505551416875,
      "loss": 1.9291,
      "step": 2116
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5314111709594727,
      "learning_rate": 0.00018749393681975242,
      "loss": 0.8942,
      "step": 2117
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.003737449645996,
      "learning_rate": 0.00018748231718685045,
      "loss": 1.5801,
      "step": 2118
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1476635932922363,
      "learning_rate": 0.0001874706925188379,
      "loss": 1.6522,
      "step": 2119
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9506001472473145,
      "learning_rate": 0.00018745906281638378,
      "loss": 1.974,
      "step": 2120
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9215342402458191,
      "learning_rate": 0.00018744742808015746,
      "loss": 1.8729,
      "step": 2121
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4887336492538452,
      "learning_rate": 0.0001874357883108286,
      "loss": 1.4079,
      "step": 2122
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2117910385131836,
      "learning_rate": 0.00018742414350906715,
      "loss": 1.3964,
      "step": 2123
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5060629844665527,
      "learning_rate": 0.0001874124936755433,
      "loss": 1.6777,
      "step": 2124
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6328376531600952,
      "learning_rate": 0.0001874008388109276,
      "loss": 1.5554,
      "step": 2125
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3376835584640503,
      "learning_rate": 0.0001873891789158908,
      "loss": 1.6456,
      "step": 2126
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9704335927963257,
      "learning_rate": 0.0001873775139911041,
      "loss": 1.2913,
      "step": 2127
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0552871227264404,
      "learning_rate": 0.00018736584403723875,
      "loss": 1.4705,
      "step": 2128
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.8836256265640259,
      "learning_rate": 0.0001873541690549665,
      "loss": 1.4,
      "step": 2129
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4057183265686035,
      "learning_rate": 0.0001873424890449593,
      "loss": 1.0387,
      "step": 2130
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1246986389160156,
      "learning_rate": 0.00018733080400788944,
      "loss": 1.7748,
      "step": 2131
    },
    {
      "epoch": 0.16,
      "grad_norm": 4.139894962310791,
      "learning_rate": 0.00018731911394442936,
      "loss": 3.2778,
      "step": 2132
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4140353202819824,
      "learning_rate": 0.00018730741885525197,
      "loss": 1.3965,
      "step": 2133
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2078421115875244,
      "learning_rate": 0.0001872957187410304,
      "loss": 1.6377,
      "step": 2134
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.621906042098999,
      "learning_rate": 0.00018728401360243804,
      "loss": 1.5145,
      "step": 2135
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.019482135772705,
      "learning_rate": 0.00018727230344014854,
      "loss": 1.5729,
      "step": 2136
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7273908853530884,
      "learning_rate": 0.00018726058825483595,
      "loss": 1.3942,
      "step": 2137
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1648463010787964,
      "learning_rate": 0.00018724886804717453,
      "loss": 1.5618,
      "step": 2138
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.9817413687705994,
      "learning_rate": 0.0001872371428178388,
      "loss": 1.6508,
      "step": 2139
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0536787509918213,
      "learning_rate": 0.0001872254125675037,
      "loss": 1.672,
      "step": 2140
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.206842064857483,
      "learning_rate": 0.0001872136772968443,
      "loss": 1.7054,
      "step": 2141
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.377039909362793,
      "learning_rate": 0.00018720193700653607,
      "loss": 1.9175,
      "step": 2142
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.587377667427063,
      "learning_rate": 0.00018719019169725472,
      "loss": 1.524,
      "step": 2143
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3691163063049316,
      "learning_rate": 0.00018717844136967624,
      "loss": 2.0123,
      "step": 2144
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.125208854675293,
      "learning_rate": 0.00018716668602447698,
      "loss": 1.4986,
      "step": 2145
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4956010580062866,
      "learning_rate": 0.00018715492566233346,
      "loss": 1.643,
      "step": 2146
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.113866925239563,
      "learning_rate": 0.00018714316028392263,
      "loss": 1.6444,
      "step": 2147
    },
    {
      "epoch": 0.16,
      "grad_norm": 4.422802925109863,
      "learning_rate": 0.0001871313898899216,
      "loss": 0.5691,
      "step": 2148
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7565993070602417,
      "learning_rate": 0.00018711961448100785,
      "loss": 1.6967,
      "step": 2149
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.1771401166915894,
      "learning_rate": 0.0001871078340578591,
      "loss": 1.4246,
      "step": 2150
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4642034769058228,
      "learning_rate": 0.0001870960486211534,
      "loss": 1.4633,
      "step": 2151
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4023393392562866,
      "learning_rate": 0.0001870842581715691,
      "loss": 1.4376,
      "step": 2152
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0427132844924927,
      "learning_rate": 0.0001870724627097847,
      "loss": 0.9295,
      "step": 2153
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2686562538146973,
      "learning_rate": 0.00018706066223647924,
      "loss": 1.3289,
      "step": 2154
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.0442060232162476,
      "learning_rate": 0.0001870488567523318,
      "loss": 2.3402,
      "step": 2155
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.477177858352661,
      "learning_rate": 0.00018703704625802189,
      "loss": 1.275,
      "step": 2156
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4938557147979736,
      "learning_rate": 0.00018702523075422932,
      "loss": 1.129,
      "step": 2157
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8555259704589844,
      "learning_rate": 0.00018701341024163405,
      "loss": 1.4919,
      "step": 2158
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2240419387817383,
      "learning_rate": 0.00018700158472091644,
      "loss": 2.0033,
      "step": 2159
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6885181665420532,
      "learning_rate": 0.00018698975419275716,
      "loss": 2.0883,
      "step": 2160
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5426675081253052,
      "learning_rate": 0.00018697791865783712,
      "loss": 1.8064,
      "step": 2161
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.130729913711548,
      "learning_rate": 0.00018696607811683746,
      "loss": 1.6362,
      "step": 2162
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2551113367080688,
      "learning_rate": 0.00018695423257043977,
      "loss": 1.5763,
      "step": 2163
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1213221549987793,
      "learning_rate": 0.00018694238201932573,
      "loss": 1.6974,
      "step": 2164
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1616156101226807,
      "learning_rate": 0.00018693052646417746,
      "loss": 1.6596,
      "step": 2165
    },
    {
      "epoch": 0.17,
      "grad_norm": 6.722579479217529,
      "learning_rate": 0.00018691866590567734,
      "loss": 2.9892,
      "step": 2166
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8673579692840576,
      "learning_rate": 0.00018690680034450793,
      "loss": 1.9498,
      "step": 2167
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.963913083076477,
      "learning_rate": 0.00018689492978135226,
      "loss": 1.4053,
      "step": 2168
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2016019821166992,
      "learning_rate": 0.00018688305421689347,
      "loss": 1.3691,
      "step": 2169
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.148660659790039,
      "learning_rate": 0.00018687117365181512,
      "loss": 1.8961,
      "step": 2170
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.277746558189392,
      "learning_rate": 0.00018685928808680096,
      "loss": 1.5445,
      "step": 2171
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5631917715072632,
      "learning_rate": 0.0001868473975225351,
      "loss": 2.2144,
      "step": 2172
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7950224876403809,
      "learning_rate": 0.00018683550195970194,
      "loss": 0.9112,
      "step": 2173
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7637038230895996,
      "learning_rate": 0.00018682360139898608,
      "loss": 1.8167,
      "step": 2174
    },
    {
      "epoch": 0.17,
      "grad_norm": 6.406255722045898,
      "learning_rate": 0.0001868116958410725,
      "loss": 2.024,
      "step": 2175
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8848767280578613,
      "learning_rate": 0.00018679978528664642,
      "loss": 1.0225,
      "step": 2176
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6584534645080566,
      "learning_rate": 0.00018678786973639334,
      "loss": 2.3841,
      "step": 2177
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.7600991129875183,
      "learning_rate": 0.00018677594919099916,
      "loss": 1.1394,
      "step": 2178
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8896485567092896,
      "learning_rate": 0.00018676402365114982,
      "loss": 1.2784,
      "step": 2179
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6155078411102295,
      "learning_rate": 0.00018675209311753185,
      "loss": 0.9653,
      "step": 2180
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9815515279769897,
      "learning_rate": 0.00018674015759083187,
      "loss": 1.5222,
      "step": 2181
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2819887399673462,
      "learning_rate": 0.00018672821707173677,
      "loss": 1.243,
      "step": 2182
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8455151319503784,
      "learning_rate": 0.00018671627156093392,
      "loss": 1.3599,
      "step": 2183
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3283578157424927,
      "learning_rate": 0.00018670432105911077,
      "loss": 1.1579,
      "step": 2184
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1349412202835083,
      "learning_rate": 0.00018669236556695515,
      "loss": 1.6859,
      "step": 2185
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5285807847976685,
      "learning_rate": 0.00018668040508515522,
      "loss": 1.365,
      "step": 2186
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2611331939697266,
      "learning_rate": 0.0001866684396143993,
      "loss": 1.4132,
      "step": 2187
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0699093341827393,
      "learning_rate": 0.00018665646915537608,
      "loss": 1.872,
      "step": 2188
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.7244629859924316,
      "learning_rate": 0.0001866444937087746,
      "loss": 2.672,
      "step": 2189
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9850581884384155,
      "learning_rate": 0.000186632513275284,
      "loss": 1.3315,
      "step": 2190
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7386339902877808,
      "learning_rate": 0.00018662052785559395,
      "loss": 1.3101,
      "step": 2191
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1573853492736816,
      "learning_rate": 0.00018660853745039422,
      "loss": 1.9023,
      "step": 2192
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0737178325653076,
      "learning_rate": 0.0001865965420603749,
      "loss": 0.8991,
      "step": 2193
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0321296453475952,
      "learning_rate": 0.00018658454168622645,
      "loss": 1.5933,
      "step": 2194
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2741105556488037,
      "learning_rate": 0.00018657253632863952,
      "loss": 1.8781,
      "step": 2195
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5949606895446777,
      "learning_rate": 0.0001865605259883051,
      "loss": 2.2264,
      "step": 2196
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0265753269195557,
      "learning_rate": 0.00018654851066591448,
      "loss": 1.7371,
      "step": 2197
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2439813613891602,
      "learning_rate": 0.00018653649036215914,
      "loss": 1.2479,
      "step": 2198
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0910509824752808,
      "learning_rate": 0.00018652446507773097,
      "loss": 1.1017,
      "step": 2199
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1106380224227905,
      "learning_rate": 0.00018651243481332213,
      "loss": 2.0647,
      "step": 2200
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4218714237213135,
      "learning_rate": 0.00018650039956962493,
      "loss": 1.7522,
      "step": 2201
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9870725870132446,
      "learning_rate": 0.00018648835934733214,
      "loss": 1.472,
      "step": 2202
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5788404941558838,
      "learning_rate": 0.00018647631414713675,
      "loss": 1.6802,
      "step": 2203
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.129634976387024,
      "learning_rate": 0.000186464263969732,
      "loss": 1.6225,
      "step": 2204
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.06429922580719,
      "learning_rate": 0.00018645220881581144,
      "loss": 1.5926,
      "step": 2205
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.005364179611206,
      "learning_rate": 0.00018644014868606895,
      "loss": 1.1482,
      "step": 2206
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6158915758132935,
      "learning_rate": 0.00018642808358119864,
      "loss": 1.2115,
      "step": 2207
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4461055994033813,
      "learning_rate": 0.00018641601350189493,
      "loss": 2.9564,
      "step": 2208
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4221813678741455,
      "learning_rate": 0.0001864039384488525,
      "loss": 2.0503,
      "step": 2209
    },
    {
      "epoch": 0.17,
      "grad_norm": 4.457809925079346,
      "learning_rate": 0.00018639185842276635,
      "loss": 1.5649,
      "step": 2210
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9270502924919128,
      "learning_rate": 0.00018637977342433182,
      "loss": 1.0466,
      "step": 2211
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2489789724349976,
      "learning_rate": 0.00018636768345424437,
      "loss": 1.7525,
      "step": 2212
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8414405584335327,
      "learning_rate": 0.0001863555885131999,
      "loss": 1.9274,
      "step": 2213
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7411645650863647,
      "learning_rate": 0.00018634348860189451,
      "loss": 2.0369,
      "step": 2214
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9383180737495422,
      "learning_rate": 0.00018633138372102468,
      "loss": 1.1461,
      "step": 2215
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9456875920295715,
      "learning_rate": 0.00018631927387128706,
      "loss": 1.1428,
      "step": 2216
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6756751537323,
      "learning_rate": 0.00018630715905337865,
      "loss": 1.7317,
      "step": 2217
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8240599632263184,
      "learning_rate": 0.00018629503926799676,
      "loss": 2.1952,
      "step": 2218
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2448365688323975,
      "learning_rate": 0.00018628291451583894,
      "loss": 1.3284,
      "step": 2219
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4502902030944824,
      "learning_rate": 0.000186270784797603,
      "loss": 1.87,
      "step": 2220
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9185240864753723,
      "learning_rate": 0.00018625865011398713,
      "loss": 1.8071,
      "step": 2221
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.892194151878357,
      "learning_rate": 0.00018624651046568973,
      "loss": 2.3366,
      "step": 2222
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6155434846878052,
      "learning_rate": 0.00018623436585340946,
      "loss": 2.3179,
      "step": 2223
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8798762559890747,
      "learning_rate": 0.0001862222162778454,
      "loss": 1.3663,
      "step": 2224
    },
    {
      "epoch": 0.17,
      "grad_norm": 4.675378799438477,
      "learning_rate": 0.00018621006173969676,
      "loss": 2.509,
      "step": 2225
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9570962190628052,
      "learning_rate": 0.00018619790223966317,
      "loss": 1.3454,
      "step": 2226
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.100441813468933,
      "learning_rate": 0.00018618573777844438,
      "loss": 1.3421,
      "step": 2227
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6917295455932617,
      "learning_rate": 0.0001861735683567406,
      "loss": 1.7691,
      "step": 2228
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2099535465240479,
      "learning_rate": 0.00018616139397525228,
      "loss": 0.9647,
      "step": 2229
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9543778300285339,
      "learning_rate": 0.00018614921463468002,
      "loss": 1.6333,
      "step": 2230
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0725566148757935,
      "learning_rate": 0.00018613703033572488,
      "loss": 2.4644,
      "step": 2231
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.739214539527893,
      "learning_rate": 0.00018612484107908815,
      "loss": 1.1551,
      "step": 2232
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.445033073425293,
      "learning_rate": 0.00018611264686547134,
      "loss": 1.572,
      "step": 2233
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5702890157699585,
      "learning_rate": 0.00018610044769557633,
      "loss": 1.9219,
      "step": 2234
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0572572946548462,
      "learning_rate": 0.00018608824357010524,
      "loss": 1.4193,
      "step": 2235
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3571988344192505,
      "learning_rate": 0.00018607603448976052,
      "loss": 2.3478,
      "step": 2236
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0008894205093384,
      "learning_rate": 0.00018606382045524482,
      "loss": 2.1462,
      "step": 2237
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.015216588973999,
      "learning_rate": 0.00018605160146726115,
      "loss": 1.8795,
      "step": 2238
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4580222368240356,
      "learning_rate": 0.0001860393775265128,
      "loss": 1.1236,
      "step": 2239
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6023411750793457,
      "learning_rate": 0.0001860271486337033,
      "loss": 1.8985,
      "step": 2240
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8743711113929749,
      "learning_rate": 0.00018601491478953657,
      "loss": 1.4276,
      "step": 2241
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.9740655422210693,
      "learning_rate": 0.0001860026759947166,
      "loss": 1.6382,
      "step": 2242
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.53725266456604,
      "learning_rate": 0.0001859904322499479,
      "loss": 0.9328,
      "step": 2243
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3344935178756714,
      "learning_rate": 0.00018597818355593518,
      "loss": 1.3147,
      "step": 2244
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.8827890157699585,
      "learning_rate": 0.00018596592991338334,
      "loss": 1.9305,
      "step": 2245
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.653582215309143,
      "learning_rate": 0.00018595367132299777,
      "loss": 2.1556,
      "step": 2246
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5748933553695679,
      "learning_rate": 0.0001859414077854839,
      "loss": 1.4569,
      "step": 2247
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6682087182998657,
      "learning_rate": 0.00018592913930154763,
      "loss": 2.0934,
      "step": 2248
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7723093032836914,
      "learning_rate": 0.00018591686587189504,
      "loss": 1.2911,
      "step": 2249
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2984025478363037,
      "learning_rate": 0.00018590458749723264,
      "loss": 1.2472,
      "step": 2250
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4867559671401978,
      "learning_rate": 0.00018589230417826697,
      "loss": 1.1506,
      "step": 2251
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.009392261505127,
      "learning_rate": 0.00018588001591570512,
      "loss": 1.4082,
      "step": 2252
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1832680702209473,
      "learning_rate": 0.00018586772271025431,
      "loss": 1.3938,
      "step": 2253
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3268243074417114,
      "learning_rate": 0.0001858554245626221,
      "loss": 1.3222,
      "step": 2254
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.951869010925293,
      "learning_rate": 0.0001858431214735163,
      "loss": 1.7997,
      "step": 2255
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9152456521987915,
      "learning_rate": 0.00018583081344364502,
      "loss": 1.7046,
      "step": 2256
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8095563650131226,
      "learning_rate": 0.00018581850047371664,
      "loss": 2.1913,
      "step": 2257
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6787691116333008,
      "learning_rate": 0.00018580618256443994,
      "loss": 1.2119,
      "step": 2258
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.548046588897705,
      "learning_rate": 0.00018579385971652376,
      "loss": 1.5031,
      "step": 2259
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0659232139587402,
      "learning_rate": 0.00018578153193067745,
      "loss": 1.9582,
      "step": 2260
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7474249601364136,
      "learning_rate": 0.00018576919920761047,
      "loss": 1.284,
      "step": 2261
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0797966718673706,
      "learning_rate": 0.00018575686154803268,
      "loss": 1.5919,
      "step": 2262
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2919870615005493,
      "learning_rate": 0.00018574451895265418,
      "loss": 1.3413,
      "step": 2263
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3134751319885254,
      "learning_rate": 0.00018573217142218534,
      "loss": 1.7027,
      "step": 2264
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1462273597717285,
      "learning_rate": 0.00018571981895733688,
      "loss": 1.6416,
      "step": 2265
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5426689386367798,
      "learning_rate": 0.0001857074615588197,
      "loss": 1.5688,
      "step": 2266
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3581451177597046,
      "learning_rate": 0.00018569509922734505,
      "loss": 1.7642,
      "step": 2267
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.149461269378662,
      "learning_rate": 0.00018568273196362448,
      "loss": 1.5225,
      "step": 2268
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2650744915008545,
      "learning_rate": 0.00018567035976836975,
      "loss": 1.5094,
      "step": 2269
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9420284628868103,
      "learning_rate": 0.00018565798264229302,
      "loss": 1.7119,
      "step": 2270
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.192095398902893,
      "learning_rate": 0.00018564560058610658,
      "loss": 1.7225,
      "step": 2271
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6683266162872314,
      "learning_rate": 0.00018563321360052316,
      "loss": 1.5606,
      "step": 2272
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2866169214248657,
      "learning_rate": 0.00018562082168625569,
      "loss": 1.2428,
      "step": 2273
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2914239168167114,
      "learning_rate": 0.00018560842484401734,
      "loss": 1.6998,
      "step": 2274
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.093656063079834,
      "learning_rate": 0.0001855960230745217,
      "loss": 1.3107,
      "step": 2275
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9363633394241333,
      "learning_rate": 0.0001855836163784825,
      "loss": 1.2627,
      "step": 2276
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4203261137008667,
      "learning_rate": 0.00018557120475661386,
      "loss": 1.4083,
      "step": 2277
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.991675615310669,
      "learning_rate": 0.00018555878820963013,
      "loss": 1.797,
      "step": 2278
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.762258768081665,
      "learning_rate": 0.00018554636673824595,
      "loss": 1.6539,
      "step": 2279
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6558839082717896,
      "learning_rate": 0.00018553394034317622,
      "loss": 1.2994,
      "step": 2280
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5757673978805542,
      "learning_rate": 0.00018552150902513618,
      "loss": 1.9603,
      "step": 2281
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1160295009613037,
      "learning_rate": 0.00018550907278484132,
      "loss": 1.9348,
      "step": 2282
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1296244859695435,
      "learning_rate": 0.0001854966316230074,
      "loss": 1.7641,
      "step": 2283
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0017194747924805,
      "learning_rate": 0.0001854841855403505,
      "loss": 1.1515,
      "step": 2284
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.571256160736084,
      "learning_rate": 0.00018547173453758696,
      "loss": 1.8567,
      "step": 2285
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5037791728973389,
      "learning_rate": 0.00018545927861543342,
      "loss": 1.7144,
      "step": 2286
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.123090147972107,
      "learning_rate": 0.00018544681777460674,
      "loss": 0.928,
      "step": 2287
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.292649269104004,
      "learning_rate": 0.0001854343520158242,
      "loss": 2.3357,
      "step": 2288
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4525164365768433,
      "learning_rate": 0.0001854218813398032,
      "loss": 1.6655,
      "step": 2289
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0707814693450928,
      "learning_rate": 0.0001854094057472615,
      "loss": 0.9798,
      "step": 2290
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.04225754737854,
      "learning_rate": 0.00018539692523891722,
      "loss": 1.4882,
      "step": 2291
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.9843524098396301,
      "learning_rate": 0.00018538443981548855,
      "loss": 1.2404,
      "step": 2292
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0756069421768188,
      "learning_rate": 0.00018537194947769426,
      "loss": 1.5177,
      "step": 2293
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.037545084953308,
      "learning_rate": 0.0001853594542262531,
      "loss": 1.3882,
      "step": 2294
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6116201877593994,
      "learning_rate": 0.00018534695406188437,
      "loss": 1.4315,
      "step": 2295
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5783591270446777,
      "learning_rate": 0.0001853344489853074,
      "loss": 1.3031,
      "step": 2296
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3638614416122437,
      "learning_rate": 0.000185321938997242,
      "loss": 1.7499,
      "step": 2297
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1626038551330566,
      "learning_rate": 0.0001853094240984082,
      "loss": 1.4799,
      "step": 2298
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4176241159439087,
      "learning_rate": 0.00018529690428952629,
      "loss": 1.8076,
      "step": 2299
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2510733604431152,
      "learning_rate": 0.0001852843795713168,
      "loss": 1.3252,
      "step": 2300
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2764078378677368,
      "learning_rate": 0.00018527184994450072,
      "loss": 1.8648,
      "step": 2301
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9808374643325806,
      "learning_rate": 0.0001852593154097991,
      "loss": 2.4443,
      "step": 2302
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4428743124008179,
      "learning_rate": 0.00018524677596793338,
      "loss": 1.2033,
      "step": 2303
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8886793851852417,
      "learning_rate": 0.00018523423161962533,
      "loss": 1.8391,
      "step": 2304
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.272862672805786,
      "learning_rate": 0.00018522168236559695,
      "loss": 2.1801,
      "step": 2305
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0731643438339233,
      "learning_rate": 0.00018520912820657045,
      "loss": 1.4927,
      "step": 2306
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2761253118515015,
      "learning_rate": 0.00018519656914326848,
      "loss": 1.5931,
      "step": 2307
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6572721004486084,
      "learning_rate": 0.00018518400517641382,
      "loss": 1.417,
      "step": 2308
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8746814727783203,
      "learning_rate": 0.00018517143630672963,
      "loss": 0.89,
      "step": 2309
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.614192008972168,
      "learning_rate": 0.00018515886253493932,
      "loss": 1.4885,
      "step": 2310
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.353829860687256,
      "learning_rate": 0.00018514628386176658,
      "loss": 2.2917,
      "step": 2311
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3031790256500244,
      "learning_rate": 0.0001851337002879354,
      "loss": 1.9106,
      "step": 2312
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2709347009658813,
      "learning_rate": 0.00018512111181417,
      "loss": 1.5709,
      "step": 2313
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9584470391273499,
      "learning_rate": 0.00018510851844119494,
      "loss": 1.5398,
      "step": 2314
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.37656831741333,
      "learning_rate": 0.00018509592016973506,
      "loss": 1.7402,
      "step": 2315
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.341498613357544,
      "learning_rate": 0.0001850833170005154,
      "loss": 1.9034,
      "step": 2316
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.123159646987915,
      "learning_rate": 0.00018507070893426144,
      "loss": 1.6445,
      "step": 2317
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9053751230239868,
      "learning_rate": 0.00018505809597169876,
      "loss": 1.2776,
      "step": 2318
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7041749954223633,
      "learning_rate": 0.00018504547811355336,
      "loss": 0.8437,
      "step": 2319
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8563440442085266,
      "learning_rate": 0.00018503285536055147,
      "loss": 1.7255,
      "step": 2320
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2100199460983276,
      "learning_rate": 0.00018502022771341959,
      "loss": 1.8268,
      "step": 2321
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.949065923690796,
      "learning_rate": 0.00018500759517288447,
      "loss": 1.6456,
      "step": 2322
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8019559383392334,
      "learning_rate": 0.00018499495773967325,
      "loss": 1.614,
      "step": 2323
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8014552593231201,
      "learning_rate": 0.00018498231541451326,
      "loss": 1.1445,
      "step": 2324
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9911909103393555,
      "learning_rate": 0.00018496966819813216,
      "loss": 1.6239,
      "step": 2325
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2379635572433472,
      "learning_rate": 0.00018495701609125783,
      "loss": 1.517,
      "step": 2326
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3923547267913818,
      "learning_rate": 0.0001849443590946185,
      "loss": 2.3313,
      "step": 2327
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.15995192527771,
      "learning_rate": 0.00018493169720894265,
      "loss": 2.3038,
      "step": 2328
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3079756498336792,
      "learning_rate": 0.00018491903043495902,
      "loss": 1.1044,
      "step": 2329
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.097944736480713,
      "learning_rate": 0.00018490635877339666,
      "loss": 1.6953,
      "step": 2330
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9140447378158569,
      "learning_rate": 0.00018489368222498497,
      "loss": 1.0904,
      "step": 2331
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.192610502243042,
      "learning_rate": 0.00018488100079045344,
      "loss": 1.7585,
      "step": 2332
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.163336157798767,
      "learning_rate": 0.00018486831447053209,
      "loss": 0.888,
      "step": 2333
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8094735741615295,
      "learning_rate": 0.00018485562326595097,
      "loss": 1.5076,
      "step": 2334
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2006944417953491,
      "learning_rate": 0.0001848429271774406,
      "loss": 1.3476,
      "step": 2335
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1054164171218872,
      "learning_rate": 0.00018483022620573167,
      "loss": 1.37,
      "step": 2336
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9973242282867432,
      "learning_rate": 0.00018481752035155524,
      "loss": 1.8581,
      "step": 2337
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.424880862236023,
      "learning_rate": 0.0001848048096156426,
      "loss": 1.6744,
      "step": 2338
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2073864936828613,
      "learning_rate": 0.00018479209399872534,
      "loss": 1.2594,
      "step": 2339
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0897554159164429,
      "learning_rate": 0.00018477937350153522,
      "loss": 1.1598,
      "step": 2340
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2197062969207764,
      "learning_rate": 0.00018476664812480448,
      "loss": 1.5487,
      "step": 2341
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2287393808364868,
      "learning_rate": 0.00018475391786926547,
      "loss": 1.663,
      "step": 2342
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1051807403564453,
      "learning_rate": 0.00018474118273565098,
      "loss": 1.421,
      "step": 2343
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.039228916168213,
      "learning_rate": 0.0001847284427246939,
      "loss": 1.262,
      "step": 2344
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0084545612335205,
      "learning_rate": 0.00018471569783712753,
      "loss": 1.6126,
      "step": 2345
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1875412464141846,
      "learning_rate": 0.0001847029480736854,
      "loss": 1.5111,
      "step": 2346
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.408515214920044,
      "learning_rate": 0.00018469019343510135,
      "loss": 1.0223,
      "step": 2347
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.439459204673767,
      "learning_rate": 0.0001846774339221095,
      "loss": 1.6789,
      "step": 2348
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9959097504615784,
      "learning_rate": 0.00018466466953544414,
      "loss": 2.2687,
      "step": 2349
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1190475225448608,
      "learning_rate": 0.00018465190027584005,
      "loss": 1.4747,
      "step": 2350
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.286983013153076,
      "learning_rate": 0.0001846391261440321,
      "loss": 2.1301,
      "step": 2351
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5867146253585815,
      "learning_rate": 0.00018462634714075553,
      "loss": 1.4167,
      "step": 2352
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8689700365066528,
      "learning_rate": 0.00018461356326674588,
      "loss": 1.2901,
      "step": 2353
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0790961980819702,
      "learning_rate": 0.0001846007745227389,
      "loss": 1.1592,
      "step": 2354
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.152185320854187,
      "learning_rate": 0.00018458798090947065,
      "loss": 1.4315,
      "step": 2355
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2446038722991943,
      "learning_rate": 0.0001845751824276775,
      "loss": 2.0246,
      "step": 2356
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.907238483428955,
      "learning_rate": 0.00018456237907809605,
      "loss": 2.0848,
      "step": 2357
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3213831186294556,
      "learning_rate": 0.00018454957086146327,
      "loss": 1.6062,
      "step": 2358
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2470637559890747,
      "learning_rate": 0.00018453675777851627,
      "loss": 1.0603,
      "step": 2359
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9368222951889038,
      "learning_rate": 0.00018452393982999255,
      "loss": 1.7322,
      "step": 2360
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6508381366729736,
      "learning_rate": 0.00018451111701662986,
      "loss": 2.0022,
      "step": 2361
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2880313396453857,
      "learning_rate": 0.0001844982893391662,
      "loss": 1.6138,
      "step": 2362
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2829961776733398,
      "learning_rate": 0.0001844854567983399,
      "loss": 1.1581,
      "step": 2363
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3490675687789917,
      "learning_rate": 0.00018447261939488955,
      "loss": 1.7651,
      "step": 2364
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4711353778839111,
      "learning_rate": 0.00018445977712955403,
      "loss": 1.4867,
      "step": 2365
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4447135925292969,
      "learning_rate": 0.00018444693000307244,
      "loss": 1.7183,
      "step": 2366
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1997359991073608,
      "learning_rate": 0.00018443407801618423,
      "loss": 1.4208,
      "step": 2367
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3434408903121948,
      "learning_rate": 0.0001844212211696291,
      "loss": 1.0652,
      "step": 2368
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3465216159820557,
      "learning_rate": 0.00018440835946414706,
      "loss": 1.6722,
      "step": 2369
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2646031379699707,
      "learning_rate": 0.00018439549290047834,
      "loss": 2.1877,
      "step": 2370
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.201146364212036,
      "learning_rate": 0.00018438262147936355,
      "loss": 2.1446,
      "step": 2371
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3375712633132935,
      "learning_rate": 0.0001843697452015434,
      "loss": 1.1123,
      "step": 2372
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9591042399406433,
      "learning_rate": 0.00018435686406775915,
      "loss": 1.29,
      "step": 2373
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0687825679779053,
      "learning_rate": 0.000184343978078752,
      "loss": 1.7085,
      "step": 2374
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.336578130722046,
      "learning_rate": 0.00018433108723526375,
      "loss": 2.251,
      "step": 2375
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.451974868774414,
      "learning_rate": 0.00018431819153803632,
      "loss": 1.8474,
      "step": 2376
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6899678707122803,
      "learning_rate": 0.0001843052909878119,
      "loss": 2.1326,
      "step": 2377
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.841210663318634,
      "learning_rate": 0.00018429238558533294,
      "loss": 1.1822,
      "step": 2378
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.135097622871399,
      "learning_rate": 0.00018427947533134235,
      "loss": 1.6133,
      "step": 2379
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1499232053756714,
      "learning_rate": 0.0001842665602265831,
      "loss": 1.5609,
      "step": 2380
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7460004091262817,
      "learning_rate": 0.00018425364027179858,
      "loss": 1.8194,
      "step": 2381
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1055498123168945,
      "learning_rate": 0.00018424071546773233,
      "loss": 1.665,
      "step": 2382
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4509763717651367,
      "learning_rate": 0.00018422778581512832,
      "loss": 1.904,
      "step": 2383
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1810585260391235,
      "learning_rate": 0.0001842148513147307,
      "loss": 1.6296,
      "step": 2384
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9503604173660278,
      "learning_rate": 0.00018420191196728388,
      "loss": 1.8966,
      "step": 2385
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.79385244846344,
      "learning_rate": 0.0001841889677735327,
      "loss": 1.3024,
      "step": 2386
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5016684532165527,
      "learning_rate": 0.0001841760187342221,
      "loss": 1.9675,
      "step": 2387
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.9985657930374146,
      "learning_rate": 0.00018416306485009737,
      "loss": 1.7354,
      "step": 2388
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.9959518909454346,
      "learning_rate": 0.0001841501061219041,
      "loss": 2.6231,
      "step": 2389
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.057917833328247,
      "learning_rate": 0.00018413714255038812,
      "loss": 2.4391,
      "step": 2390
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1149567365646362,
      "learning_rate": 0.00018412417413629558,
      "loss": 1.2923,
      "step": 2391
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.799069881439209,
      "learning_rate": 0.0001841112008803729,
      "loss": 2.0751,
      "step": 2392
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.160134196281433,
      "learning_rate": 0.00018409822278336673,
      "loss": 1.6821,
      "step": 2393
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0470232963562012,
      "learning_rate": 0.00018408523984602405,
      "loss": 1.5979,
      "step": 2394
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.200524926185608,
      "learning_rate": 0.00018407225206909208,
      "loss": 1.5455,
      "step": 2395
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5277314186096191,
      "learning_rate": 0.0001840592594533184,
      "loss": 1.6736,
      "step": 2396
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.8766077160835266,
      "learning_rate": 0.00018404626199945075,
      "loss": 1.4379,
      "step": 2397
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.148285984992981,
      "learning_rate": 0.0001840332597082372,
      "loss": 1.4353,
      "step": 2398
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.433425784111023,
      "learning_rate": 0.00018402025258042616,
      "loss": 1.5523,
      "step": 2399
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2888743877410889,
      "learning_rate": 0.00018400724061676626,
      "loss": 1.9341,
      "step": 2400
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2652356624603271,
      "learning_rate": 0.0001839942238180064,
      "loss": 1.8996,
      "step": 2401
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9986523389816284,
      "learning_rate": 0.00018398120218489575,
      "loss": 1.4919,
      "step": 2402
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1060230731964111,
      "learning_rate": 0.00018396817571818378,
      "loss": 1.749,
      "step": 2403
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.112353801727295,
      "learning_rate": 0.00018395514441862026,
      "loss": 2.3403,
      "step": 2404
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3169727325439453,
      "learning_rate": 0.00018394210828695523,
      "loss": 1.3482,
      "step": 2405
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5603878498077393,
      "learning_rate": 0.00018392906732393898,
      "loss": 1.6309,
      "step": 2406
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6989796161651611,
      "learning_rate": 0.00018391602153032207,
      "loss": 1.314,
      "step": 2407
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.158369779586792,
      "learning_rate": 0.0001839029709068554,
      "loss": 1.5746,
      "step": 2408
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1912907361984253,
      "learning_rate": 0.00018388991545429008,
      "loss": 1.2026,
      "step": 2409
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1459382772445679,
      "learning_rate": 0.0001838768551733775,
      "loss": 1.5442,
      "step": 2410
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4140665531158447,
      "learning_rate": 0.00018386379006486941,
      "loss": 1.943,
      "step": 2411
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0372114181518555,
      "learning_rate": 0.00018385072012951778,
      "loss": 1.0264,
      "step": 2412
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5255200862884521,
      "learning_rate": 0.00018383764536807485,
      "loss": 1.6823,
      "step": 2413
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7081594467163086,
      "learning_rate": 0.00018382456578129307,
      "loss": 1.3484,
      "step": 2414
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.0987217426300049,
      "learning_rate": 0.00018381148136992537,
      "loss": 1.3134,
      "step": 2415
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.277355670928955,
      "learning_rate": 0.00018379839213472475,
      "loss": 1.7337,
      "step": 2416
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6749662160873413,
      "learning_rate": 0.0001837852980764446,
      "loss": 1.5475,
      "step": 2417
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.207226037979126,
      "learning_rate": 0.00018377219919583858,
      "loss": 1.2969,
      "step": 2418
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.146901249885559,
      "learning_rate": 0.00018375909549366056,
      "loss": 1.479,
      "step": 2419
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3324731588363647,
      "learning_rate": 0.00018374598697066475,
      "loss": 2.1638,
      "step": 2420
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.9208669662475586,
      "learning_rate": 0.00018373287362760562,
      "loss": 1.9247,
      "step": 2421
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8240015506744385,
      "learning_rate": 0.00018371975546523794,
      "loss": 1.8987,
      "step": 2422
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.7626339793205261,
      "learning_rate": 0.00018370663248431672,
      "loss": 1.1694,
      "step": 2423
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5892682075500488,
      "learning_rate": 0.00018369350468559726,
      "loss": 1.3325,
      "step": 2424
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.471057653427124,
      "learning_rate": 0.00018368037206983515,
      "loss": 1.4833,
      "step": 2425
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5842951536178589,
      "learning_rate": 0.0001836672346377862,
      "loss": 1.6621,
      "step": 2426
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9173165559768677,
      "learning_rate": 0.00018365409239020662,
      "loss": 1.5982,
      "step": 2427
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3221607208251953,
      "learning_rate": 0.00018364094532785278,
      "loss": 1.4372,
      "step": 2428
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1124347448349,
      "learning_rate": 0.0001836277934514814,
      "loss": 1.3249,
      "step": 2429
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.976240634918213,
      "learning_rate": 0.0001836146367618494,
      "loss": 2.1811,
      "step": 2430
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6727609634399414,
      "learning_rate": 0.00018360147525971402,
      "loss": 1.879,
      "step": 2431
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4951810836791992,
      "learning_rate": 0.00018358830894583288,
      "loss": 1.2749,
      "step": 2432
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.739833354949951,
      "learning_rate": 0.00018357513782096363,
      "loss": 1.5745,
      "step": 2433
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3532471656799316,
      "learning_rate": 0.00018356196188586444,
      "loss": 1.9291,
      "step": 2434
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.378670573234558,
      "learning_rate": 0.00018354878114129367,
      "loss": 1.6764,
      "step": 2435
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4260529279708862,
      "learning_rate": 0.00018353559558800984,
      "loss": 1.9027,
      "step": 2436
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1657241582870483,
      "learning_rate": 0.000183522405226772,
      "loss": 1.238,
      "step": 2437
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.629821538925171,
      "learning_rate": 0.00018350921005833923,
      "loss": 2.0932,
      "step": 2438
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.43118155002594,
      "learning_rate": 0.00018349601008347104,
      "loss": 2.2262,
      "step": 2439
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2286436557769775,
      "learning_rate": 0.00018348280530292713,
      "loss": 1.7376,
      "step": 2440
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3669089078903198,
      "learning_rate": 0.0001834695957174675,
      "loss": 1.7055,
      "step": 2441
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7764086723327637,
      "learning_rate": 0.00018345638132785251,
      "loss": 2.1373,
      "step": 2442
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4588940143585205,
      "learning_rate": 0.00018344316213484265,
      "loss": 1.6545,
      "step": 2443
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.914450466632843,
      "learning_rate": 0.0001834299381391988,
      "loss": 1.7004,
      "step": 2444
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1287596225738525,
      "learning_rate": 0.00018341670934168204,
      "loss": 1.487,
      "step": 2445
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9952164888381958,
      "learning_rate": 0.0001834034757430538,
      "loss": 1.4537,
      "step": 2446
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2285135984420776,
      "learning_rate": 0.00018339023734407575,
      "loss": 1.5013,
      "step": 2447
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.655881881713867,
      "learning_rate": 0.00018337699414550982,
      "loss": 1.8414,
      "step": 2448
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1183717250823975,
      "learning_rate": 0.0001833637461481182,
      "loss": 0.6971,
      "step": 2449
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2841267585754395,
      "learning_rate": 0.00018335049335266345,
      "loss": 2.766,
      "step": 2450
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0994460582733154,
      "learning_rate": 0.0001833372357599083,
      "loss": 1.4074,
      "step": 2451
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.716026782989502,
      "learning_rate": 0.00018332397337061585,
      "loss": 1.3509,
      "step": 2452
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9447699785232544,
      "learning_rate": 0.0001833107061855494,
      "loss": 2.2,
      "step": 2453
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.006174087524414,
      "learning_rate": 0.0001832974342054725,
      "loss": 2.4833,
      "step": 2454
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0100760459899902,
      "learning_rate": 0.00018328415743114912,
      "loss": 1.7687,
      "step": 2455
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.006091833114624,
      "learning_rate": 0.00018327087586334339,
      "loss": 1.7532,
      "step": 2456
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6647827625274658,
      "learning_rate": 0.00018325758950281967,
      "loss": 1.7129,
      "step": 2457
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.7328689098358154,
      "learning_rate": 0.00018324429835034275,
      "loss": 1.6918,
      "step": 2458
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3991206884384155,
      "learning_rate": 0.00018323100240667757,
      "loss": 1.2214,
      "step": 2459
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4184281826019287,
      "learning_rate": 0.0001832177016725894,
      "loss": 1.7862,
      "step": 2460
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2416338920593262,
      "learning_rate": 0.00018320439614884378,
      "loss": 1.1556,
      "step": 2461
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.8497054576873779,
      "learning_rate": 0.00018319108583620653,
      "loss": 1.4325,
      "step": 2462
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.297071695327759,
      "learning_rate": 0.00018317777073544367,
      "loss": 1.4674,
      "step": 2463
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2527564764022827,
      "learning_rate": 0.00018316445084732164,
      "loss": 1.2915,
      "step": 2464
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6498967409133911,
      "learning_rate": 0.00018315112617260705,
      "loss": 1.4927,
      "step": 2465
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3820255994796753,
      "learning_rate": 0.00018313779671206678,
      "loss": 1.453,
      "step": 2466
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0465338230133057,
      "learning_rate": 0.0001831244624664681,
      "loss": 1.5588,
      "step": 2467
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2561140060424805,
      "learning_rate": 0.00018311112343657836,
      "loss": 2.3434,
      "step": 2468
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8549691438674927,
      "learning_rate": 0.00018309777962316535,
      "loss": 1.8663,
      "step": 2469
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1539772748947144,
      "learning_rate": 0.00018308443102699713,
      "loss": 1.7022,
      "step": 2470
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8160803318023682,
      "learning_rate": 0.00018307107764884193,
      "loss": 1.4103,
      "step": 2471
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6173945665359497,
      "learning_rate": 0.0001830577194894683,
      "loss": 2.2557,
      "step": 2472
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3309111595153809,
      "learning_rate": 0.00018304435654964513,
      "loss": 1.4374,
      "step": 2473
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3846160173416138,
      "learning_rate": 0.00018303098883014154,
      "loss": 1.2915,
      "step": 2474
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.7553638219833374,
      "learning_rate": 0.00018301761633172684,
      "loss": 1.2126,
      "step": 2475
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7009459733963013,
      "learning_rate": 0.0001830042390551708,
      "loss": 2.1108,
      "step": 2476
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5136101245880127,
      "learning_rate": 0.00018299085700124325,
      "loss": 1.109,
      "step": 2477
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.203506350517273,
      "learning_rate": 0.0001829774701707145,
      "loss": 1.5482,
      "step": 2478
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3412259817123413,
      "learning_rate": 0.00018296407856435498,
      "loss": 1.5731,
      "step": 2479
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0847840309143066,
      "learning_rate": 0.00018295068218293547,
      "loss": 1.3852,
      "step": 2480
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2312690019607544,
      "learning_rate": 0.00018293728102722706,
      "loss": 1.6252,
      "step": 2481
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.248072862625122,
      "learning_rate": 0.00018292387509800096,
      "loss": 1.289,
      "step": 2482
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.179131031036377,
      "learning_rate": 0.00018291046439602884,
      "loss": 1.1865,
      "step": 2483
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9913524389266968,
      "learning_rate": 0.00018289704892208248,
      "loss": 1.6867,
      "step": 2484
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9110174179077148,
      "learning_rate": 0.00018288362867693414,
      "loss": 1.1553,
      "step": 2485
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6185886859893799,
      "learning_rate": 0.00018287020366135613,
      "loss": 1.1664,
      "step": 2486
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2882068157196045,
      "learning_rate": 0.00018285677387612118,
      "loss": 1.3837,
      "step": 2487
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0966992378234863,
      "learning_rate": 0.00018284333932200227,
      "loss": 2.0991,
      "step": 2488
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.107521891593933,
      "learning_rate": 0.0001828298999997726,
      "loss": 1.4665,
      "step": 2489
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9115859270095825,
      "learning_rate": 0.00018281645591020567,
      "loss": 1.819,
      "step": 2490
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6969553232192993,
      "learning_rate": 0.0001828030070540753,
      "loss": 1.9467,
      "step": 2491
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9309782385826111,
      "learning_rate": 0.00018278955343215553,
      "loss": 1.5587,
      "step": 2492
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.8606192469596863,
      "learning_rate": 0.0001827760950452207,
      "loss": 1.2219,
      "step": 2493
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4240384101867676,
      "learning_rate": 0.0001827626318940454,
      "loss": 1.509,
      "step": 2494
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9683323502540588,
      "learning_rate": 0.00018274916397940455,
      "loss": 1.5464,
      "step": 2495
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5916328430175781,
      "learning_rate": 0.00018273569130207326,
      "loss": 1.6866,
      "step": 2496
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1711751222610474,
      "learning_rate": 0.00018272221386282701,
      "loss": 0.6521,
      "step": 2497
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9833508133888245,
      "learning_rate": 0.00018270873166244145,
      "loss": 1.1848,
      "step": 2498
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9562767744064331,
      "learning_rate": 0.0001826952447016926,
      "loss": 1.6486,
      "step": 2499
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5414010286331177,
      "learning_rate": 0.0001826817529813567,
      "loss": 1.5915,
      "step": 2500
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5371520519256592,
      "learning_rate": 0.0001826682565022103,
      "loss": 1.6248,
      "step": 2501
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1924084424972534,
      "learning_rate": 0.00018265475526503013,
      "loss": 1.6678,
      "step": 2502
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9980251789093018,
      "learning_rate": 0.0001826412492705933,
      "loss": 1.5125,
      "step": 2503
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.559988021850586,
      "learning_rate": 0.0001826277385196772,
      "loss": 1.5828,
      "step": 2504
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7559432983398438,
      "learning_rate": 0.0001826142230130594,
      "loss": 1.137,
      "step": 2505
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5402323007583618,
      "learning_rate": 0.0001826007027515178,
      "loss": 1.3295,
      "step": 2506
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1579385995864868,
      "learning_rate": 0.00018258717773583062,
      "loss": 1.4156,
      "step": 2507
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1856001615524292,
      "learning_rate": 0.00018257364796677624,
      "loss": 1.36,
      "step": 2508
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.8217978477478027,
      "learning_rate": 0.0001825601134451334,
      "loss": 2.0589,
      "step": 2509
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9169117212295532,
      "learning_rate": 0.00018254657417168108,
      "loss": 1.6328,
      "step": 2510
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4844073057174683,
      "learning_rate": 0.00018253303014719856,
      "loss": 1.0588,
      "step": 2511
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.0582952499389648,
      "learning_rate": 0.00018251948137246537,
      "loss": 1.1737,
      "step": 2512
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7108761072158813,
      "learning_rate": 0.0001825059278482613,
      "loss": 0.9281,
      "step": 2513
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2192548513412476,
      "learning_rate": 0.00018249236957536649,
      "loss": 1.5876,
      "step": 2514
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9669437408447266,
      "learning_rate": 0.00018247880655456125,
      "loss": 1.1935,
      "step": 2515
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2992327213287354,
      "learning_rate": 0.00018246523878662617,
      "loss": 1.3166,
      "step": 2516
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.8267993927001953,
      "learning_rate": 0.00018245166627234223,
      "loss": 0.9829,
      "step": 2517
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.4841349124908447,
      "learning_rate": 0.0001824380890124906,
      "loss": 1.7494,
      "step": 2518
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5740329027175903,
      "learning_rate": 0.0001824245070078527,
      "loss": 1.8858,
      "step": 2519
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9241811037063599,
      "learning_rate": 0.00018241092025921026,
      "loss": 2.0102,
      "step": 2520
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.229830265045166,
      "learning_rate": 0.00018239732876734527,
      "loss": 2.2825,
      "step": 2521
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1919364929199219,
      "learning_rate": 0.00018238373253303997,
      "loss": 0.8369,
      "step": 2522
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1899614334106445,
      "learning_rate": 0.00018237013155707698,
      "loss": 1.8493,
      "step": 2523
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.074646234512329,
      "learning_rate": 0.00018235652584023905,
      "loss": 1.5851,
      "step": 2524
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8513882160186768,
      "learning_rate": 0.0001823429153833093,
      "loss": 1.5082,
      "step": 2525
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7147942781448364,
      "learning_rate": 0.00018232930018707104,
      "loss": 1.6965,
      "step": 2526
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3767508268356323,
      "learning_rate": 0.00018231568025230797,
      "loss": 1.9788,
      "step": 2527
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.230792284011841,
      "learning_rate": 0.000182302055579804,
      "loss": 1.8609,
      "step": 2528
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0133883953094482,
      "learning_rate": 0.0001822884261703432,
      "loss": 1.9083,
      "step": 2529
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9067933559417725,
      "learning_rate": 0.00018227479202471015,
      "loss": 1.3199,
      "step": 2530
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4447543621063232,
      "learning_rate": 0.00018226115314368948,
      "loss": 1.7017,
      "step": 2531
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9102394580841064,
      "learning_rate": 0.00018224750952806624,
      "loss": 1.6076,
      "step": 2532
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3442761898040771,
      "learning_rate": 0.00018223386117862566,
      "loss": 1.3202,
      "step": 2533
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.7070152759552,
      "learning_rate": 0.00018222020809615335,
      "loss": 2.4575,
      "step": 2534
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.414053201675415,
      "learning_rate": 0.00018220655028143505,
      "loss": 1.3558,
      "step": 2535
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1721621751785278,
      "learning_rate": 0.00018219288773525688,
      "loss": 1.6637,
      "step": 2536
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4827830791473389,
      "learning_rate": 0.0001821792204584052,
      "loss": 1.3582,
      "step": 2537
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.320481538772583,
      "learning_rate": 0.0001821655484516666,
      "loss": 1.8543,
      "step": 2538
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.961631178855896,
      "learning_rate": 0.00018215187171582802,
      "loss": 1.5246,
      "step": 2539
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6919267177581787,
      "learning_rate": 0.00018213819025167665,
      "loss": 1.2371,
      "step": 2540
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.238290786743164,
      "learning_rate": 0.00018212450405999994,
      "loss": 1.8487,
      "step": 2541
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.662940502166748,
      "learning_rate": 0.00018211081314158554,
      "loss": 1.6937,
      "step": 2542
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.359961986541748,
      "learning_rate": 0.00018209711749722147,
      "loss": 1.8011,
      "step": 2543
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1527910232543945,
      "learning_rate": 0.00018208341712769603,
      "loss": 1.5611,
      "step": 2544
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1702790260314941,
      "learning_rate": 0.00018206971203379773,
      "loss": 1.0083,
      "step": 2545
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.409660577774048,
      "learning_rate": 0.00018205600221631542,
      "loss": 1.0252,
      "step": 2546
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9780800342559814,
      "learning_rate": 0.0001820422876760381,
      "loss": 1.3942,
      "step": 2547
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5022331476211548,
      "learning_rate": 0.00018202856841375518,
      "loss": 2.0251,
      "step": 2548
    },
    {
      "epoch": 0.19,
      "grad_norm": 4.589594841003418,
      "learning_rate": 0.00018201484443025628,
      "loss": 1.8529,
      "step": 2549
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1910361051559448,
      "learning_rate": 0.00018200111572633124,
      "loss": 2.0532,
      "step": 2550
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2353655099868774,
      "learning_rate": 0.00018198738230277027,
      "loss": 1.3476,
      "step": 2551
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.236703872680664,
      "learning_rate": 0.0001819736441603638,
      "loss": 1.3169,
      "step": 2552
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1696693897247314,
      "learning_rate": 0.00018195990129990256,
      "loss": 1.6263,
      "step": 2553
    },
    {
      "epoch": 0.19,
      "grad_norm": 4.73249626159668,
      "learning_rate": 0.0001819461537221775,
      "loss": 2.6419,
      "step": 2554
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7608572244644165,
      "learning_rate": 0.00018193240142797988,
      "loss": 0.9949,
      "step": 2555
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1952437162399292,
      "learning_rate": 0.00018191864441810123,
      "loss": 1.7594,
      "step": 2556
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7200685739517212,
      "learning_rate": 0.00018190488269333334,
      "loss": 1.8258,
      "step": 2557
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.913358449935913,
      "learning_rate": 0.00018189111625446826,
      "loss": 1.312,
      "step": 2558
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.071839451789856,
      "learning_rate": 0.00018187734510229838,
      "loss": 1.5865,
      "step": 2559
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.559231758117676,
      "learning_rate": 0.00018186356923761626,
      "loss": 1.6771,
      "step": 2560
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3526185750961304,
      "learning_rate": 0.0001818497886612148,
      "loss": 1.4061,
      "step": 2561
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7621352672576904,
      "learning_rate": 0.00018183600337388713,
      "loss": 1.7553,
      "step": 2562
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2093881368637085,
      "learning_rate": 0.00018182221337642668,
      "loss": 1.3658,
      "step": 2563
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9994827508926392,
      "learning_rate": 0.0001818084186696272,
      "loss": 1.3965,
      "step": 2564
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2645670175552368,
      "learning_rate": 0.00018179461925428255,
      "loss": 1.3573,
      "step": 2565
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.118042230606079,
      "learning_rate": 0.00018178081513118706,
      "loss": 1.2567,
      "step": 2566
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4919819831848145,
      "learning_rate": 0.0001817670063011352,
      "loss": 1.4331,
      "step": 2567
    },
    {
      "epoch": 0.2,
      "grad_norm": 4.142040252685547,
      "learning_rate": 0.00018175319276492172,
      "loss": 1.7794,
      "step": 2568
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0690990686416626,
      "learning_rate": 0.00018173937452334174,
      "loss": 1.0578,
      "step": 2569
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2621536254882812,
      "learning_rate": 0.0001817255515771905,
      "loss": 1.3116,
      "step": 2570
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9877061247825623,
      "learning_rate": 0.00018171172392726363,
      "loss": 1.4296,
      "step": 2571
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.028310537338257,
      "learning_rate": 0.000181697891574357,
      "loss": 2.1906,
      "step": 2572
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1077208518981934,
      "learning_rate": 0.00018168405451926672,
      "loss": 1.758,
      "step": 2573
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.974448025226593,
      "learning_rate": 0.0001816702127627892,
      "loss": 1.5215,
      "step": 2574
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7925071716308594,
      "learning_rate": 0.0001816563663057211,
      "loss": 1.5635,
      "step": 2575
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4951417446136475,
      "learning_rate": 0.0001816425151488594,
      "loss": 2.4215,
      "step": 2576
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.764892816543579,
      "learning_rate": 0.00018162865929300125,
      "loss": 1.5134,
      "step": 2577
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1533671617507935,
      "learning_rate": 0.00018161479873894423,
      "loss": 1.8396,
      "step": 2578
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2261055707931519,
      "learning_rate": 0.00018160093348748598,
      "loss": 2.1453,
      "step": 2579
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9697266221046448,
      "learning_rate": 0.00018158706353942463,
      "loss": 2.334,
      "step": 2580
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1048790216445923,
      "learning_rate": 0.00018157318889555838,
      "loss": 1.6463,
      "step": 2581
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.85962176322937,
      "learning_rate": 0.0001815593095566859,
      "loss": 1.7409,
      "step": 2582
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6670985221862793,
      "learning_rate": 0.00018154542552360593,
      "loss": 1.631,
      "step": 2583
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1452802419662476,
      "learning_rate": 0.00018153153679711763,
      "loss": 1.4664,
      "step": 2584
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0981721878051758,
      "learning_rate": 0.00018151764337802036,
      "loss": 1.0846,
      "step": 2585
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6959917545318604,
      "learning_rate": 0.00018150374526711377,
      "loss": 1.3793,
      "step": 2586
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.013586163520813,
      "learning_rate": 0.0001814898424651978,
      "loss": 1.5246,
      "step": 2587
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.48091459274292,
      "learning_rate": 0.00018147593497307258,
      "loss": 1.5729,
      "step": 2588
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3866233825683594,
      "learning_rate": 0.00018146202279153859,
      "loss": 1.8018,
      "step": 2589
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2428841590881348,
      "learning_rate": 0.00018144810592139656,
      "loss": 1.4784,
      "step": 2590
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.054607391357422,
      "learning_rate": 0.0001814341843634475,
      "loss": 2.4104,
      "step": 2591
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.714171051979065,
      "learning_rate": 0.00018142025811849268,
      "loss": 1.9242,
      "step": 2592
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.8417809009552002,
      "learning_rate": 0.0001814063271873336,
      "loss": 1.5156,
      "step": 2593
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.9130959510803223,
      "learning_rate": 0.0001813923915707721,
      "loss": 1.6853,
      "step": 2594
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.6998541355133057,
      "learning_rate": 0.00018137845126961022,
      "loss": 1.5581,
      "step": 2595
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.545060396194458,
      "learning_rate": 0.00018136450628465035,
      "loss": 1.3414,
      "step": 2596
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2236192226409912,
      "learning_rate": 0.00018135055661669505,
      "loss": 1.9899,
      "step": 2597
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5834341049194336,
      "learning_rate": 0.00018133660226654725,
      "loss": 1.15,
      "step": 2598
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4205825328826904,
      "learning_rate": 0.00018132264323501008,
      "loss": 1.1781,
      "step": 2599
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0680155754089355,
      "learning_rate": 0.000181308679522887,
      "loss": 1.8333,
      "step": 2600
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1171678304672241,
      "learning_rate": 0.00018129471113098163,
      "loss": 1.4489,
      "step": 2601
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.7856915593147278,
      "learning_rate": 0.000181280738060098,
      "loss": 1.1877,
      "step": 2602
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.31536865234375,
      "learning_rate": 0.00018126676031104028,
      "loss": 1.1752,
      "step": 2603
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7965269088745117,
      "learning_rate": 0.00018125277788461303,
      "loss": 2.2177,
      "step": 2604
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1301157474517822,
      "learning_rate": 0.00018123879078162097,
      "loss": 1.385,
      "step": 2605
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.134826421737671,
      "learning_rate": 0.00018122479900286917,
      "loss": 1.333,
      "step": 2606
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0767065286636353,
      "learning_rate": 0.00018121080254916293,
      "loss": 1.4667,
      "step": 2607
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0572493076324463,
      "learning_rate": 0.00018119680142130782,
      "loss": 1.4817,
      "step": 2608
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1220647096633911,
      "learning_rate": 0.0001811827956201097,
      "loss": 1.3455,
      "step": 2609
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.9429771900177,
      "learning_rate": 0.00018116878514637464,
      "loss": 1.6088,
      "step": 2610
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.188751459121704,
      "learning_rate": 0.00018115477000090908,
      "loss": 1.2789,
      "step": 2611
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4948848485946655,
      "learning_rate": 0.00018114075018451967,
      "loss": 1.6406,
      "step": 2612
    },
    {
      "epoch": 0.2,
      "grad_norm": 4.189512252807617,
      "learning_rate": 0.00018112672569801328,
      "loss": 3.308,
      "step": 2613
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.13133704662323,
      "learning_rate": 0.0001811126965421971,
      "loss": 1.5584,
      "step": 2614
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1793345212936401,
      "learning_rate": 0.00018109866271787868,
      "loss": 1.7062,
      "step": 2615
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2307897806167603,
      "learning_rate": 0.00018108462422586566,
      "loss": 2.1963,
      "step": 2616
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9827930331230164,
      "learning_rate": 0.00018107058106696603,
      "loss": 1.4974,
      "step": 2617
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9056128263473511,
      "learning_rate": 0.00018105653324198816,
      "loss": 1.5996,
      "step": 2618
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.660839319229126,
      "learning_rate": 0.00018104248075174044,
      "loss": 1.4263,
      "step": 2619
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2041702270507812,
      "learning_rate": 0.00018102842359703176,
      "loss": 2.0162,
      "step": 2620
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.115233063697815,
      "learning_rate": 0.00018101436177867116,
      "loss": 1.7326,
      "step": 2621
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.8851780891418457,
      "learning_rate": 0.00018100029529746802,
      "loss": 2.1483,
      "step": 2622
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1652135848999023,
      "learning_rate": 0.0001809862241542319,
      "loss": 1.6896,
      "step": 2623
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8556911945343018,
      "learning_rate": 0.0001809721483497727,
      "loss": 1.6152,
      "step": 2624
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1437106132507324,
      "learning_rate": 0.00018095806788490055,
      "loss": 1.6725,
      "step": 2625
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3305866718292236,
      "learning_rate": 0.00018094398276042586,
      "loss": 0.7951,
      "step": 2626
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3857873678207397,
      "learning_rate": 0.00018092989297715931,
      "loss": 1.7477,
      "step": 2627
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0084290504455566,
      "learning_rate": 0.00018091579853591187,
      "loss": 1.7908,
      "step": 2628
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1571614742279053,
      "learning_rate": 0.00018090169943749476,
      "loss": 1.1145,
      "step": 2629
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.192537546157837,
      "learning_rate": 0.0001808875956827194,
      "loss": 2.5904,
      "step": 2630
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4975266456604004,
      "learning_rate": 0.0001808734872723976,
      "loss": 1.7768,
      "step": 2631
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9240743517875671,
      "learning_rate": 0.0001808593742073414,
      "loss": 1.7032,
      "step": 2632
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4734222888946533,
      "learning_rate": 0.00018084525648836303,
      "loss": 1.6072,
      "step": 2633
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.831834614276886,
      "learning_rate": 0.00018083113411627508,
      "loss": 1.3503,
      "step": 2634
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5967613458633423,
      "learning_rate": 0.0001808170070918904,
      "loss": 1.4235,
      "step": 2635
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.083259105682373,
      "learning_rate": 0.000180802875416022,
      "loss": 1.3902,
      "step": 2636
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3844375610351562,
      "learning_rate": 0.00018078873908948325,
      "loss": 1.8175,
      "step": 2637
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.78001070022583,
      "learning_rate": 0.00018077459811308787,
      "loss": 1.7643,
      "step": 2638
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4645507335662842,
      "learning_rate": 0.0001807604524876497,
      "loss": 1.6989,
      "step": 2639
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3825865983963013,
      "learning_rate": 0.00018074630221398286,
      "loss": 1.4281,
      "step": 2640
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1687963008880615,
      "learning_rate": 0.00018073214729290188,
      "loss": 1.2907,
      "step": 2641
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3347936868667603,
      "learning_rate": 0.00018071798772522135,
      "loss": 1.858,
      "step": 2642
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.129218578338623,
      "learning_rate": 0.00018070382351175629,
      "loss": 1.6094,
      "step": 2643
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.66196608543396,
      "learning_rate": 0.00018068965465332194,
      "loss": 1.6774,
      "step": 2644
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.8766286969184875,
      "learning_rate": 0.00018067548115073377,
      "loss": 1.8971,
      "step": 2645
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0771596431732178,
      "learning_rate": 0.00018066130300480753,
      "loss": 1.6094,
      "step": 2646
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9572142958641052,
      "learning_rate": 0.00018064712021635934,
      "loss": 1.3112,
      "step": 2647
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.267822027206421,
      "learning_rate": 0.0001806329327862054,
      "loss": 1.3389,
      "step": 2648
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0338741540908813,
      "learning_rate": 0.0001806187407151623,
      "loss": 0.9883,
      "step": 2649
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9270285964012146,
      "learning_rate": 0.00018060454400404695,
      "loss": 1.4589,
      "step": 2650
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1306405067443848,
      "learning_rate": 0.0001805903426536764,
      "loss": 1.6122,
      "step": 2651
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2618389129638672,
      "learning_rate": 0.00018057613666486798,
      "loss": 1.305,
      "step": 2652
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.803387403488159,
      "learning_rate": 0.00018056192603843943,
      "loss": 1.5926,
      "step": 2653
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9748861789703369,
      "learning_rate": 0.00018054771077520852,
      "loss": 1.4912,
      "step": 2654
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6393640041351318,
      "learning_rate": 0.00018053349087599353,
      "loss": 1.995,
      "step": 2655
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.163567543029785,
      "learning_rate": 0.00018051926634161282,
      "loss": 1.6592,
      "step": 2656
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9740241765975952,
      "learning_rate": 0.00018050503717288518,
      "loss": 1.8771,
      "step": 2657
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5365108251571655,
      "learning_rate": 0.0001804908033706295,
      "loss": 1.4434,
      "step": 2658
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3889141082763672,
      "learning_rate": 0.00018047656493566506,
      "loss": 1.8994,
      "step": 2659
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.3505680561065674,
      "learning_rate": 0.00018046232186881133,
      "loss": 2.0782,
      "step": 2660
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6121197938919067,
      "learning_rate": 0.00018044807417088812,
      "loss": 1.0735,
      "step": 2661
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3459789752960205,
      "learning_rate": 0.00018043382184271544,
      "loss": 1.7046,
      "step": 2662
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8061280250549316,
      "learning_rate": 0.0001804195648851136,
      "loss": 2.1633,
      "step": 2663
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.7086873054504395,
      "learning_rate": 0.00018040530329890318,
      "loss": 2.5185,
      "step": 2664
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3487948179244995,
      "learning_rate": 0.000180391037084905,
      "loss": 1.6804,
      "step": 2665
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7554478645324707,
      "learning_rate": 0.00018037676624394017,
      "loss": 2.0941,
      "step": 2666
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.286693572998047,
      "learning_rate": 0.00018036249077683008,
      "loss": 1.6523,
      "step": 2667
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.257081151008606,
      "learning_rate": 0.00018034821068439632,
      "loss": 1.726,
      "step": 2668
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2478562593460083,
      "learning_rate": 0.00018033392596746083,
      "loss": 1.6287,
      "step": 2669
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9190035462379456,
      "learning_rate": 0.0001803196366268458,
      "loss": 1.836,
      "step": 2670
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1123993396759033,
      "learning_rate": 0.00018030534266337358,
      "loss": 1.7733,
      "step": 2671
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1372060775756836,
      "learning_rate": 0.00018029104407786694,
      "loss": 1.1477,
      "step": 2672
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9410860538482666,
      "learning_rate": 0.00018027674087114882,
      "loss": 0.894,
      "step": 2673
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2485352754592896,
      "learning_rate": 0.00018026243304404245,
      "loss": 1.8157,
      "step": 2674
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.423012137413025,
      "learning_rate": 0.00018024812059737132,
      "loss": 1.4633,
      "step": 2675
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5996516942977905,
      "learning_rate": 0.00018023380353195925,
      "loss": 1.4629,
      "step": 2676
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.0664187669754028,
      "learning_rate": 0.00018021948184863021,
      "loss": 1.2664,
      "step": 2677
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.65443754196167,
      "learning_rate": 0.0001802051555482085,
      "loss": 1.3615,
      "step": 2678
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.389872431755066,
      "learning_rate": 0.0001801908246315187,
      "loss": 2.4519,
      "step": 2679
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.8049565553665161,
      "learning_rate": 0.0001801764890993856,
      "loss": 1.3739,
      "step": 2680
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.9182584881782532,
      "learning_rate": 0.00018016214895263434,
      "loss": 1.5056,
      "step": 2681
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.484815001487732,
      "learning_rate": 0.00018014780419209028,
      "loss": 1.3152,
      "step": 2682
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9906147718429565,
      "learning_rate": 0.00018013345481857903,
      "loss": 1.9421,
      "step": 2683
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1315810680389404,
      "learning_rate": 0.00018011910083292641,
      "loss": 1.751,
      "step": 2684
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.506044864654541,
      "learning_rate": 0.0001801047422359587,
      "loss": 2.419,
      "step": 2685
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5668696165084839,
      "learning_rate": 0.00018009037902850226,
      "loss": 1.3681,
      "step": 2686
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.211445689201355,
      "learning_rate": 0.00018007601121138372,
      "loss": 1.7141,
      "step": 2687
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.689695954322815,
      "learning_rate": 0.00018006163878543015,
      "loss": 1.799,
      "step": 2688
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0454462766647339,
      "learning_rate": 0.00018004726175146864,
      "loss": 1.5766,
      "step": 2689
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.70527982711792,
      "learning_rate": 0.00018003288011032677,
      "loss": 1.4959,
      "step": 2690
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8661803603172302,
      "learning_rate": 0.00018001849386283222,
      "loss": 1.0343,
      "step": 2691
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0838207006454468,
      "learning_rate": 0.00018000410300981302,
      "loss": 1.8309,
      "step": 2692
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1940923929214478,
      "learning_rate": 0.00017998970755209748,
      "loss": 1.4551,
      "step": 2693
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4241362810134888,
      "learning_rate": 0.0001799753074905141,
      "loss": 1.9997,
      "step": 2694
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.908848226070404,
      "learning_rate": 0.0001799609028258917,
      "loss": 1.5751,
      "step": 2695
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2979034185409546,
      "learning_rate": 0.00017994649355905935,
      "loss": 2.0692,
      "step": 2696
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.256519079208374,
      "learning_rate": 0.0001799320796908464,
      "loss": 1.4705,
      "step": 2697
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8084309697151184,
      "learning_rate": 0.00017991766122208244,
      "loss": 1.2748,
      "step": 2698
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.127168893814087,
      "learning_rate": 0.00017990323815359732,
      "loss": 1.6395,
      "step": 2699
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.641097068786621,
      "learning_rate": 0.0001798888104862212,
      "loss": 1.6141,
      "step": 2700
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5019055604934692,
      "learning_rate": 0.00017987437822078442,
      "loss": 1.3406,
      "step": 2701
    },
    {
      "epoch": 0.21,
      "grad_norm": 7.724727630615234,
      "learning_rate": 0.00017985994135811775,
      "loss": 2.128,
      "step": 2702
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3175315856933594,
      "learning_rate": 0.00017984549989905201,
      "loss": 1.579,
      "step": 2703
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1976380348205566,
      "learning_rate": 0.00017983105384441843,
      "loss": 1.467,
      "step": 2704
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7984952926635742,
      "learning_rate": 0.00017981660319504845,
      "loss": 1.7947,
      "step": 2705
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1421533823013306,
      "learning_rate": 0.00017980214795177378,
      "loss": 1.5359,
      "step": 2706
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3746885061264038,
      "learning_rate": 0.00017978768811542647,
      "loss": 1.2864,
      "step": 2707
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0678335428237915,
      "learning_rate": 0.00017977322368683868,
      "loss": 2.056,
      "step": 2708
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6958097219467163,
      "learning_rate": 0.00017975875466684298,
      "loss": 1.5562,
      "step": 2709
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.108420729637146,
      "learning_rate": 0.00017974428105627208,
      "loss": 1.3468,
      "step": 2710
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3043594360351562,
      "learning_rate": 0.0001797298028559591,
      "loss": 1.7515,
      "step": 2711
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2974086999893188,
      "learning_rate": 0.00017971532006673727,
      "loss": 1.8971,
      "step": 2712
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.852262020111084,
      "learning_rate": 0.00017970083268944022,
      "loss": 1.6165,
      "step": 2713
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1394715309143066,
      "learning_rate": 0.00017968634072490177,
      "loss": 1.525,
      "step": 2714
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.730223536491394,
      "learning_rate": 0.00017967184417395598,
      "loss": 1.5425,
      "step": 2715
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4607722759246826,
      "learning_rate": 0.00017965734303743722,
      "loss": 1.5652,
      "step": 2716
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4739513397216797,
      "learning_rate": 0.00017964283731618014,
      "loss": 1.5159,
      "step": 2717
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.407629132270813,
      "learning_rate": 0.0001796283270110196,
      "loss": 2.1953,
      "step": 2718
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3666435480117798,
      "learning_rate": 0.00017961381212279077,
      "loss": 1.55,
      "step": 2719
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.897189199924469,
      "learning_rate": 0.00017959929265232906,
      "loss": 1.517,
      "step": 2720
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9913204908370972,
      "learning_rate": 0.00017958476860047016,
      "loss": 1.3644,
      "step": 2721
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2657074928283691,
      "learning_rate": 0.00017957023996804998,
      "loss": 1.1287,
      "step": 2722
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4608800411224365,
      "learning_rate": 0.00017955570675590477,
      "loss": 2.2089,
      "step": 2723
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0193554162979126,
      "learning_rate": 0.00017954116896487095,
      "loss": 1.2512,
      "step": 2724
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0996479988098145,
      "learning_rate": 0.0001795266265957853,
      "loss": 1.7387,
      "step": 2725
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.396859645843506,
      "learning_rate": 0.0001795120796494848,
      "loss": 0.9478,
      "step": 2726
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.007908821105957,
      "learning_rate": 0.00017949752812680667,
      "loss": 1.4004,
      "step": 2727
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1089260578155518,
      "learning_rate": 0.00017948297202858852,
      "loss": 2.8305,
      "step": 2728
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6246780157089233,
      "learning_rate": 0.00017946841135566807,
      "loss": 1.461,
      "step": 2729
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5077213048934937,
      "learning_rate": 0.00017945384610888341,
      "loss": 1.9362,
      "step": 2730
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9061434268951416,
      "learning_rate": 0.00017943927628907277,
      "loss": 1.3933,
      "step": 2731
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.573180913925171,
      "learning_rate": 0.00017942470189707485,
      "loss": 1.3641,
      "step": 2732
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.94691401720047,
      "learning_rate": 0.00017941012293372843,
      "loss": 1.3654,
      "step": 2733
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2123138904571533,
      "learning_rate": 0.00017939553939987259,
      "loss": 2.0694,
      "step": 2734
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8518104553222656,
      "learning_rate": 0.00017938095129634674,
      "loss": 1.4335,
      "step": 2735
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3967090845108032,
      "learning_rate": 0.00017936635862399048,
      "loss": 1.3579,
      "step": 2736
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5489751100540161,
      "learning_rate": 0.0001793517613836437,
      "loss": 1.6452,
      "step": 2737
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8444100618362427,
      "learning_rate": 0.00017933715957614658,
      "loss": 1.7524,
      "step": 2738
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8225457668304443,
      "learning_rate": 0.0001793225532023395,
      "loss": 1.6047,
      "step": 2739
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3475584983825684,
      "learning_rate": 0.0001793079422630632,
      "loss": 1.2582,
      "step": 2740
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8823466300964355,
      "learning_rate": 0.00017929332675915857,
      "loss": 2.0267,
      "step": 2741
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.041671633720398,
      "learning_rate": 0.00017927870669146685,
      "loss": 1.4587,
      "step": 2742
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2233411073684692,
      "learning_rate": 0.00017926408206082948,
      "loss": 1.3481,
      "step": 2743
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.57184898853302,
      "learning_rate": 0.0001792494528680882,
      "loss": 1.6697,
      "step": 2744
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.23740816116333,
      "learning_rate": 0.00017923481911408503,
      "loss": 2.1446,
      "step": 2745
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3570489883422852,
      "learning_rate": 0.0001792201807996622,
      "loss": 1.184,
      "step": 2746
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2090438604354858,
      "learning_rate": 0.00017920553792566223,
      "loss": 1.6649,
      "step": 2747
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9932949542999268,
      "learning_rate": 0.0001791908904929279,
      "loss": 1.6734,
      "step": 2748
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.059169054031372,
      "learning_rate": 0.00017917623850230227,
      "loss": 1.4477,
      "step": 2749
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.148447036743164,
      "learning_rate": 0.00017916158195462867,
      "loss": 1.8048,
      "step": 2750
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5640504360198975,
      "learning_rate": 0.0001791469208507506,
      "loss": 2.0327,
      "step": 2751
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.128582239151001,
      "learning_rate": 0.00017913225519151194,
      "loss": 1.41,
      "step": 2752
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.8488619327545166,
      "learning_rate": 0.00017911758497775676,
      "loss": 1.134,
      "step": 2753
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4455634355545044,
      "learning_rate": 0.00017910291021032944,
      "loss": 1.5631,
      "step": 2754
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4944995641708374,
      "learning_rate": 0.00017908823089007457,
      "loss": 1.2769,
      "step": 2755
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9813058376312256,
      "learning_rate": 0.00017907354701783706,
      "loss": 1.6637,
      "step": 2756
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.369489073753357,
      "learning_rate": 0.000179058858594462,
      "loss": 1.4978,
      "step": 2757
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2554099559783936,
      "learning_rate": 0.00017904416562079486,
      "loss": 2.1859,
      "step": 2758
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.28222393989563,
      "learning_rate": 0.00017902946809768126,
      "loss": 1.2206,
      "step": 2759
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7174161672592163,
      "learning_rate": 0.00017901476602596715,
      "loss": 1.6194,
      "step": 2760
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4694924354553223,
      "learning_rate": 0.0001790000594064987,
      "loss": 1.8251,
      "step": 2761
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8462483286857605,
      "learning_rate": 0.00017898534824012233,
      "loss": 1.3978,
      "step": 2762
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5358284711837769,
      "learning_rate": 0.00017897063252768485,
      "loss": 1.8495,
      "step": 2763
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.027576208114624,
      "learning_rate": 0.00017895591227003315,
      "loss": 1.2483,
      "step": 2764
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9198613166809082,
      "learning_rate": 0.0001789411874680145,
      "loss": 2.0361,
      "step": 2765
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.020937919616699,
      "learning_rate": 0.00017892645812247636,
      "loss": 1.4754,
      "step": 2766
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.1254162788391113,
      "learning_rate": 0.00017891172423426657,
      "loss": 2.3929,
      "step": 2767
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3503447771072388,
      "learning_rate": 0.00017889698580423303,
      "loss": 1.6302,
      "step": 2768
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9547855854034424,
      "learning_rate": 0.00017888224283322415,
      "loss": 1.4884,
      "step": 2769
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4102171659469604,
      "learning_rate": 0.00017886749532208837,
      "loss": 1.6753,
      "step": 2770
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5105555057525635,
      "learning_rate": 0.00017885274327167453,
      "loss": 1.8855,
      "step": 2771
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2191236019134521,
      "learning_rate": 0.0001788379866828317,
      "loss": 1.2524,
      "step": 2772
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.076925277709961,
      "learning_rate": 0.00017882322555640924,
      "loss": 1.7702,
      "step": 2773
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5253225564956665,
      "learning_rate": 0.00017880845989325667,
      "loss": 1.3869,
      "step": 2774
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9706666469573975,
      "learning_rate": 0.0001787936896942239,
      "loss": 1.5181,
      "step": 2775
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2555243968963623,
      "learning_rate": 0.000178778914960161,
      "loss": 1.0106,
      "step": 2776
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0951995849609375,
      "learning_rate": 0.00017876413569191838,
      "loss": 1.1281,
      "step": 2777
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.101011037826538,
      "learning_rate": 0.0001787493518903466,
      "loss": 1.6771,
      "step": 2778
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9245119094848633,
      "learning_rate": 0.0001787345635562966,
      "loss": 1.716,
      "step": 2779
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7073249816894531,
      "learning_rate": 0.0001787197706906196,
      "loss": 1.5689,
      "step": 2780
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0769165754318237,
      "learning_rate": 0.00017870497329416688,
      "loss": 1.1957,
      "step": 2781
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5829912424087524,
      "learning_rate": 0.0001786901713677902,
      "loss": 1.6839,
      "step": 2782
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.962778091430664,
      "learning_rate": 0.00017867536491234147,
      "loss": 2.0288,
      "step": 2783
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9817456007003784,
      "learning_rate": 0.0001786605539286729,
      "loss": 1.3837,
      "step": 2784
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2073166370391846,
      "learning_rate": 0.00017864573841763694,
      "loss": 1.5691,
      "step": 2785
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0485587120056152,
      "learning_rate": 0.0001786309183800863,
      "loss": 1.4506,
      "step": 2786
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0252436399459839,
      "learning_rate": 0.000178616093816874,
      "loss": 1.821,
      "step": 2787
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0755739212036133,
      "learning_rate": 0.00017860126472885323,
      "loss": 2.0214,
      "step": 2788
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8570172786712646,
      "learning_rate": 0.0001785864311168775,
      "loss": 1.2924,
      "step": 2789
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0229368209838867,
      "learning_rate": 0.00017857159298180055,
      "loss": 1.5016,
      "step": 2790
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0649616718292236,
      "learning_rate": 0.00017855675032447648,
      "loss": 0.8872,
      "step": 2791
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8503681421279907,
      "learning_rate": 0.00017854190314575948,
      "loss": 1.6714,
      "step": 2792
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5704699754714966,
      "learning_rate": 0.00017852705144650414,
      "loss": 1.996,
      "step": 2793
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2608487606048584,
      "learning_rate": 0.00017851219522756526,
      "loss": 1.5987,
      "step": 2794
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5790461301803589,
      "learning_rate": 0.00017849733448979787,
      "loss": 1.2926,
      "step": 2795
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9958703517913818,
      "learning_rate": 0.0001784824692340573,
      "loss": 1.7375,
      "step": 2796
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0007989406585693,
      "learning_rate": 0.00017846759946119918,
      "loss": 1.6766,
      "step": 2797
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0406056642532349,
      "learning_rate": 0.0001784527251720793,
      "loss": 0.7154,
      "step": 2798
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6857037544250488,
      "learning_rate": 0.00017843784636755375,
      "loss": 1.697,
      "step": 2799
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3758872747421265,
      "learning_rate": 0.00017842296304847893,
      "loss": 1.2016,
      "step": 2800
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.876728892326355,
      "learning_rate": 0.00017840807521571144,
      "loss": 1.1425,
      "step": 2801
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.120193362236023,
      "learning_rate": 0.00017839318287010816,
      "loss": 1.4505,
      "step": 2802
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1004639863967896,
      "learning_rate": 0.00017837828601252622,
      "loss": 1.6688,
      "step": 2803
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9665641784667969,
      "learning_rate": 0.00017836338464382307,
      "loss": 1.224,
      "step": 2804
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1665675640106201,
      "learning_rate": 0.00017834847876485629,
      "loss": 1.5067,
      "step": 2805
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1408460140228271,
      "learning_rate": 0.00017833356837648387,
      "loss": 1.6465,
      "step": 2806
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6898006200790405,
      "learning_rate": 0.00017831865347956395,
      "loss": 1.4203,
      "step": 2807
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.285088062286377,
      "learning_rate": 0.00017830373407495503,
      "loss": 1.8467,
      "step": 2808
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.8459039330482483,
      "learning_rate": 0.0001782888101635157,
      "loss": 0.9054,
      "step": 2809
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.593756914138794,
      "learning_rate": 0.00017827388174610498,
      "loss": 1.9598,
      "step": 2810
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7429964542388916,
      "learning_rate": 0.0001782589488235821,
      "loss": 1.3948,
      "step": 2811
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.7983728647232056,
      "learning_rate": 0.00017824401139680652,
      "loss": 1.7272,
      "step": 2812
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1525410413742065,
      "learning_rate": 0.00017822906946663794,
      "loss": 1.5984,
      "step": 2813
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4396781921386719,
      "learning_rate": 0.0001782141230339364,
      "loss": 1.7708,
      "step": 2814
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9844285845756531,
      "learning_rate": 0.00017819917209956215,
      "loss": 1.4762,
      "step": 2815
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.0625238418579102,
      "learning_rate": 0.0001781842166643757,
      "loss": 2.0925,
      "step": 2816
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.182569980621338,
      "learning_rate": 0.00017816925672923777,
      "loss": 1.8197,
      "step": 2817
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3815293312072754,
      "learning_rate": 0.00017815429229500946,
      "loss": 1.4252,
      "step": 2818
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2898019552230835,
      "learning_rate": 0.00017813932336255202,
      "loss": 1.3685,
      "step": 2819
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9709334969520569,
      "learning_rate": 0.00017812434993272702,
      "loss": 1.4821,
      "step": 2820
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9355504512786865,
      "learning_rate": 0.00017810937200639623,
      "loss": 1.8427,
      "step": 2821
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0345884561538696,
      "learning_rate": 0.00017809438958442178,
      "loss": 1.7978,
      "step": 2822
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7920200824737549,
      "learning_rate": 0.00017807940266766593,
      "loss": 1.9975,
      "step": 2823
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.767225742340088,
      "learning_rate": 0.00017806441125699132,
      "loss": 2.0778,
      "step": 2824
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2470613718032837,
      "learning_rate": 0.0001780494153532607,
      "loss": 1.5361,
      "step": 2825
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7618205547332764,
      "learning_rate": 0.0001780344149573373,
      "loss": 2.0712,
      "step": 2826
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.415669560432434,
      "learning_rate": 0.0001780194100700844,
      "loss": 1.9206,
      "step": 2827
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5162514448165894,
      "learning_rate": 0.00017800440069236563,
      "loss": 1.8554,
      "step": 2828
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.410914659500122,
      "learning_rate": 0.00017798938682504485,
      "loss": 2.0027,
      "step": 2829
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1071679592132568,
      "learning_rate": 0.00017797436846898619,
      "loss": 1.3935,
      "step": 2830
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.147964596748352,
      "learning_rate": 0.00017795934562505407,
      "loss": 1.4912,
      "step": 2831
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.7435617446899414,
      "learning_rate": 0.00017794431829411318,
      "loss": 2.129,
      "step": 2832
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3172991275787354,
      "learning_rate": 0.00017792928647702833,
      "loss": 1.7065,
      "step": 2833
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8706392049789429,
      "learning_rate": 0.00017791425017466478,
      "loss": 1.7689,
      "step": 2834
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0704721212387085,
      "learning_rate": 0.0001778992093878879,
      "loss": 2.0377,
      "step": 2835
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.085384726524353,
      "learning_rate": 0.00017788416411756338,
      "loss": 1.6258,
      "step": 2836
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0377522706985474,
      "learning_rate": 0.00017786911436455717,
      "loss": 1.239,
      "step": 2837
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.075568437576294,
      "learning_rate": 0.00017785406012973547,
      "loss": 1.4787,
      "step": 2838
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6562998294830322,
      "learning_rate": 0.00017783900141396476,
      "loss": 1.1793,
      "step": 2839
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0319321155548096,
      "learning_rate": 0.00017782393821811173,
      "loss": 1.6366,
      "step": 2840
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8204066753387451,
      "learning_rate": 0.00017780887054304336,
      "loss": 1.0569,
      "step": 2841
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.745280146598816,
      "learning_rate": 0.00017779379838962685,
      "loss": 1.7182,
      "step": 2842
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9263980388641357,
      "learning_rate": 0.00017777872175872976,
      "loss": 1.2293,
      "step": 2843
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0992419719696045,
      "learning_rate": 0.0001777636406512198,
      "loss": 1.2729,
      "step": 2844
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0471045970916748,
      "learning_rate": 0.00017774855506796496,
      "loss": 1.6821,
      "step": 2845
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9824790358543396,
      "learning_rate": 0.0001777334650098335,
      "loss": 1.4496,
      "step": 2846
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1242773532867432,
      "learning_rate": 0.00017771837047769395,
      "loss": 0.8899,
      "step": 2847
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.6642874479293823,
      "learning_rate": 0.00017770327147241508,
      "loss": 1.0561,
      "step": 2848
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.330388307571411,
      "learning_rate": 0.00017768816799486597,
      "loss": 2.3977,
      "step": 2849
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6110011339187622,
      "learning_rate": 0.00017767306004591585,
      "loss": 1.1704,
      "step": 2850
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1786772012710571,
      "learning_rate": 0.0001776579476264343,
      "loss": 1.8125,
      "step": 2851
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.8747451305389404,
      "learning_rate": 0.00017764283073729112,
      "loss": 2.1126,
      "step": 2852
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.132537841796875,
      "learning_rate": 0.0001776277093793564,
      "loss": 1.691,
      "step": 2853
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1886969804763794,
      "learning_rate": 0.0001776125835535004,
      "loss": 1.529,
      "step": 2854
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1540944576263428,
      "learning_rate": 0.00017759745326059379,
      "loss": 1.1515,
      "step": 2855
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.898065447807312,
      "learning_rate": 0.00017758231850150728,
      "loss": 1.4965,
      "step": 2856
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1841473579406738,
      "learning_rate": 0.0001775671792771121,
      "loss": 1.1464,
      "step": 2857
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3213317394256592,
      "learning_rate": 0.00017755203558827946,
      "loss": 1.7211,
      "step": 2858
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.170487880706787,
      "learning_rate": 0.0001775368874358811,
      "loss": 2.8839,
      "step": 2859
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6321872472763062,
      "learning_rate": 0.0001775217348207888,
      "loss": 1.4071,
      "step": 2860
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.080270290374756,
      "learning_rate": 0.0001775065777438747,
      "loss": 2.5606,
      "step": 2861
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0722572803497314,
      "learning_rate": 0.0001774914162060112,
      "loss": 1.6664,
      "step": 2862
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4680075645446777,
      "learning_rate": 0.0001774762502080709,
      "loss": 2.088,
      "step": 2863
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4036369323730469,
      "learning_rate": 0.0001774610797509267,
      "loss": 2.0419,
      "step": 2864
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5286943912506104,
      "learning_rate": 0.00017744590483545177,
      "loss": 1.4845,
      "step": 2865
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.734014630317688,
      "learning_rate": 0.00017743072546251947,
      "loss": 1.7281,
      "step": 2866
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.286757230758667,
      "learning_rate": 0.0001774155416330035,
      "loss": 1.6416,
      "step": 2867
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4613078832626343,
      "learning_rate": 0.00017740035334777777,
      "loss": 1.5608,
      "step": 2868
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4693994522094727,
      "learning_rate": 0.00017738516060771643,
      "loss": 1.5802,
      "step": 2869
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.256558895111084,
      "learning_rate": 0.00017736996341369393,
      "loss": 1.6264,
      "step": 2870
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0433353185653687,
      "learning_rate": 0.0001773547617665849,
      "loss": 0.7341,
      "step": 2871
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9692230224609375,
      "learning_rate": 0.0001773395556672644,
      "loss": 2.1731,
      "step": 2872
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7875900268554688,
      "learning_rate": 0.00017732434511660754,
      "loss": 1.4805,
      "step": 2873
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.585415244102478,
      "learning_rate": 0.00017730913011548979,
      "loss": 1.3485,
      "step": 2874
    },
    {
      "epoch": 0.22,
      "grad_norm": 4.7535247802734375,
      "learning_rate": 0.00017729391066478688,
      "loss": 1.6169,
      "step": 2875
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0272022485733032,
      "learning_rate": 0.00017727868676537475,
      "loss": 1.6282,
      "step": 2876
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7832159996032715,
      "learning_rate": 0.00017726345841812967,
      "loss": 2.2188,
      "step": 2877
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2951619625091553,
      "learning_rate": 0.000177248225623928,
      "loss": 1.4267,
      "step": 2878
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9606695175170898,
      "learning_rate": 0.00017723298838364667,
      "loss": 1.3325,
      "step": 2879
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5184980630874634,
      "learning_rate": 0.00017721774669816252,
      "loss": 1.9458,
      "step": 2880
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3741962909698486,
      "learning_rate": 0.0001772025005683528,
      "loss": 1.5384,
      "step": 2881
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0849144458770752,
      "learning_rate": 0.00017718724999509508,
      "loss": 1.942,
      "step": 2882
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4292443990707397,
      "learning_rate": 0.00017717199497926708,
      "loss": 1.9389,
      "step": 2883
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9631528854370117,
      "learning_rate": 0.00017715673552174684,
      "loss": 2.2069,
      "step": 2884
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.076318621635437,
      "learning_rate": 0.00017714147162341262,
      "loss": 1.5541,
      "step": 2885
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4244927167892456,
      "learning_rate": 0.00017712620328514292,
      "loss": 2.0244,
      "step": 2886
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0402904748916626,
      "learning_rate": 0.00017711093050781654,
      "loss": 1.7153,
      "step": 2887
    },
    {
      "epoch": 0.22,
      "grad_norm": 4.227261543273926,
      "learning_rate": 0.0001770956532923125,
      "loss": 2.135,
      "step": 2888
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5922776460647583,
      "learning_rate": 0.00017708037163951013,
      "loss": 1.3928,
      "step": 2889
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.69349205493927,
      "learning_rate": 0.00017706508555028893,
      "loss": 1.8622,
      "step": 2890
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2252836227416992,
      "learning_rate": 0.00017704979502552873,
      "loss": 1.7072,
      "step": 2891
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4621037244796753,
      "learning_rate": 0.0001770345000661096,
      "loss": 1.4506,
      "step": 2892
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8706007599830627,
      "learning_rate": 0.00017701920067291182,
      "loss": 1.3675,
      "step": 2893
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1612497568130493,
      "learning_rate": 0.000177003896846816,
      "loss": 1.1906,
      "step": 2894
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3252756595611572,
      "learning_rate": 0.00017698858858870292,
      "loss": 2.3991,
      "step": 2895
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9133155941963196,
      "learning_rate": 0.00017697327589945365,
      "loss": 1.5048,
      "step": 2896
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3888386487960815,
      "learning_rate": 0.00017695795877994955,
      "loss": 1.2263,
      "step": 2897
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8526657819747925,
      "learning_rate": 0.00017694263723107224,
      "loss": 1.9899,
      "step": 2898
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.631829023361206,
      "learning_rate": 0.00017692731125370354,
      "loss": 1.8929,
      "step": 2899
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.254061460494995,
      "learning_rate": 0.0001769119808487255,
      "loss": 1.3333,
      "step": 2900
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0644776821136475,
      "learning_rate": 0.00017689664601702054,
      "loss": 1.046,
      "step": 2901
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.4074547290802,
      "learning_rate": 0.00017688130675947122,
      "loss": 1.7134,
      "step": 2902
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3046749830245972,
      "learning_rate": 0.00017686596307696045,
      "loss": 2.4406,
      "step": 2903
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9585663676261902,
      "learning_rate": 0.0001768506149703713,
      "loss": 1.5793,
      "step": 2904
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3033407926559448,
      "learning_rate": 0.00017683526244058716,
      "loss": 1.9714,
      "step": 2905
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3798236846923828,
      "learning_rate": 0.00017681990548849163,
      "loss": 1.5208,
      "step": 2906
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9954004287719727,
      "learning_rate": 0.00017680454411496868,
      "loss": 1.9411,
      "step": 2907
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.408607006072998,
      "learning_rate": 0.00017678917832090235,
      "loss": 1.1175,
      "step": 2908
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1678192615509033,
      "learning_rate": 0.00017677380810717706,
      "loss": 1.0894,
      "step": 2909
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6035709381103516,
      "learning_rate": 0.00017675843347467746,
      "loss": 2.0113,
      "step": 2910
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3723537921905518,
      "learning_rate": 0.00017674305442428844,
      "loss": 1.7855,
      "step": 2911
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3367387056350708,
      "learning_rate": 0.00017672767095689516,
      "loss": 1.9498,
      "step": 2912
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.319126605987549,
      "learning_rate": 0.00017671228307338302,
      "loss": 1.8248,
      "step": 2913
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4477500915527344,
      "learning_rate": 0.00017669689077463773,
      "loss": 1.762,
      "step": 2914
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2764370441436768,
      "learning_rate": 0.00017668149406154513,
      "loss": 1.9067,
      "step": 2915
    },
    {
      "epoch": 0.22,
      "grad_norm": 5.947659015655518,
      "learning_rate": 0.00017666609293499143,
      "loss": 2.2145,
      "step": 2916
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6921685934066772,
      "learning_rate": 0.00017665068739586306,
      "loss": 1.5851,
      "step": 2917
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1917641162872314,
      "learning_rate": 0.00017663527744504664,
      "loss": 1.7368,
      "step": 2918
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8058098554611206,
      "learning_rate": 0.00017661986308342917,
      "loss": 1.3906,
      "step": 2919
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0972899198532104,
      "learning_rate": 0.0001766044443118978,
      "loss": 0.9843,
      "step": 2920
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7404465675354004,
      "learning_rate": 0.00017658902113134,
      "loss": 1.6518,
      "step": 2921
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2985998392105103,
      "learning_rate": 0.0001765735935426434,
      "loss": 1.8064,
      "step": 2922
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1594136953353882,
      "learning_rate": 0.00017655816154669608,
      "loss": 1.2904,
      "step": 2923
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.8638284802436829,
      "learning_rate": 0.00017654272514438604,
      "loss": 1.4744,
      "step": 2924
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4989476203918457,
      "learning_rate": 0.0001765272843366019,
      "loss": 1.8559,
      "step": 2925
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.4673984050750732,
      "learning_rate": 0.00017651183912423228,
      "loss": 1.9682,
      "step": 2926
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1461663246154785,
      "learning_rate": 0.0001764963895081662,
      "loss": 1.6991,
      "step": 2927
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6407140493392944,
      "learning_rate": 0.00017648093548929282,
      "loss": 1.699,
      "step": 2928
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.966536283493042,
      "learning_rate": 0.00017646547706850165,
      "loss": 2.3752,
      "step": 2929
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1338125467300415,
      "learning_rate": 0.00017645001424668237,
      "loss": 1.9118,
      "step": 2930
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9858507513999939,
      "learning_rate": 0.000176434547024725,
      "loss": 0.9876,
      "step": 2931
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.162541151046753,
      "learning_rate": 0.00017641907540351975,
      "loss": 1.1771,
      "step": 2932
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5870691537857056,
      "learning_rate": 0.00017640359938395707,
      "loss": 1.9983,
      "step": 2933
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3805882930755615,
      "learning_rate": 0.00017638811896692773,
      "loss": 1.8582,
      "step": 2934
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4002434015274048,
      "learning_rate": 0.0001763726341533227,
      "loss": 1.7799,
      "step": 2935
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.9662376046180725,
      "learning_rate": 0.00017635714494403324,
      "loss": 1.1793,
      "step": 2936
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2965056896209717,
      "learning_rate": 0.00017634165133995083,
      "loss": 1.5926,
      "step": 2937
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.955399751663208,
      "learning_rate": 0.0001763261533419672,
      "loss": 1.1429,
      "step": 2938
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.04384183883667,
      "learning_rate": 0.00017631065095097438,
      "loss": 1.4949,
      "step": 2939
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.67067289352417,
      "learning_rate": 0.00017629514416786458,
      "loss": 1.7343,
      "step": 2940
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.891022801399231,
      "learning_rate": 0.00017627963299353036,
      "loss": 1.7875,
      "step": 2941
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.168868064880371,
      "learning_rate": 0.00017626411742886443,
      "loss": 2.0602,
      "step": 2942
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9119205474853516,
      "learning_rate": 0.00017624859747475985,
      "loss": 2.1778,
      "step": 2943
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3777488470077515,
      "learning_rate": 0.00017623307313210983,
      "loss": 1.4576,
      "step": 2944
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6857945919036865,
      "learning_rate": 0.00017621754440180792,
      "loss": 1.9927,
      "step": 2945
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.8128502368927,
      "learning_rate": 0.00017620201128474783,
      "loss": 2.7562,
      "step": 2946
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.0276039838790894,
      "learning_rate": 0.00017618647378182367,
      "loss": 1.6816,
      "step": 2947
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.049342393875122,
      "learning_rate": 0.00017617093189392966,
      "loss": 1.6308,
      "step": 2948
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.3629345893859863,
      "learning_rate": 0.0001761553856219603,
      "loss": 1.9847,
      "step": 2949
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5158624649047852,
      "learning_rate": 0.00017613983496681044,
      "loss": 1.8588,
      "step": 2950
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2021241188049316,
      "learning_rate": 0.00017612427992937506,
      "loss": 2.3475,
      "step": 2951
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4682643413543701,
      "learning_rate": 0.00017610872051054943,
      "loss": 1.746,
      "step": 2952
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2661197185516357,
      "learning_rate": 0.0001760931567112291,
      "loss": 1.4439,
      "step": 2953
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4592680931091309,
      "learning_rate": 0.00017607758853230992,
      "loss": 1.8324,
      "step": 2954
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3663713932037354,
      "learning_rate": 0.00017606201597468782,
      "loss": 1.4794,
      "step": 2955
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9880014061927795,
      "learning_rate": 0.00017604643903925915,
      "loss": 1.2653,
      "step": 2956
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6111185550689697,
      "learning_rate": 0.00017603085772692042,
      "loss": 1.6694,
      "step": 2957
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1751817464828491,
      "learning_rate": 0.00017601527203856847,
      "loss": 1.9418,
      "step": 2958
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6550623178482056,
      "learning_rate": 0.00017599968197510034,
      "loss": 1.4846,
      "step": 2959
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9062333106994629,
      "learning_rate": 0.00017598408753741326,
      "loss": 1.0618,
      "step": 2960
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1863586902618408,
      "learning_rate": 0.00017596848872640487,
      "loss": 1.1716,
      "step": 2961
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1861345767974854,
      "learning_rate": 0.00017595288554297293,
      "loss": 1.1912,
      "step": 2962
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6396961212158203,
      "learning_rate": 0.00017593727798801548,
      "loss": 1.8278,
      "step": 2963
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0566277503967285,
      "learning_rate": 0.00017592166606243082,
      "loss": 1.9935,
      "step": 2964
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0946069955825806,
      "learning_rate": 0.00017590604976711754,
      "loss": 1.2365,
      "step": 2965
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0868618488311768,
      "learning_rate": 0.00017589042910297445,
      "loss": 1.8321,
      "step": 2966
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.716712474822998,
      "learning_rate": 0.0001758748040709006,
      "loss": 1.3332,
      "step": 2967
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1272343397140503,
      "learning_rate": 0.00017585917467179525,
      "loss": 1.5294,
      "step": 2968
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2962287664413452,
      "learning_rate": 0.00017584354090655803,
      "loss": 1.3971,
      "step": 2969
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6788017749786377,
      "learning_rate": 0.00017582790277608873,
      "loss": 1.2876,
      "step": 2970
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.092777967453003,
      "learning_rate": 0.0001758122602812874,
      "loss": 1.8292,
      "step": 2971
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3850321769714355,
      "learning_rate": 0.00017579661342305439,
      "loss": 1.5321,
      "step": 2972
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0173187255859375,
      "learning_rate": 0.0001757809622022902,
      "loss": 2.475,
      "step": 2973
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2427825927734375,
      "learning_rate": 0.00017576530661989571,
      "loss": 1.0984,
      "step": 2974
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3353948593139648,
      "learning_rate": 0.00017574964667677197,
      "loss": 1.8697,
      "step": 2975
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9757177233695984,
      "learning_rate": 0.0001757339823738203,
      "loss": 1.0929,
      "step": 2976
    },
    {
      "epoch": 0.23,
      "grad_norm": 4.889957904815674,
      "learning_rate": 0.0001757183137119423,
      "loss": 2.2903,
      "step": 2977
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3565962314605713,
      "learning_rate": 0.00017570264069203972,
      "loss": 2.3548,
      "step": 2978
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5622258186340332,
      "learning_rate": 0.0001756869633150147,
      "loss": 1.201,
      "step": 2979
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4675670862197876,
      "learning_rate": 0.00017567128158176953,
      "loss": 1.4844,
      "step": 2980
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2586512565612793,
      "learning_rate": 0.00017565559549320679,
      "loss": 1.1524,
      "step": 2981
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3531272411346436,
      "learning_rate": 0.00017563990505022932,
      "loss": 2.0799,
      "step": 2982
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0898302793502808,
      "learning_rate": 0.00017562421025374016,
      "loss": 1.9852,
      "step": 2983
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.298033356666565,
      "learning_rate": 0.00017560851110464266,
      "loss": 1.572,
      "step": 2984
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4280644655227661,
      "learning_rate": 0.0001755928076038404,
      "loss": 1.6292,
      "step": 2985
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.4691197872161865,
      "learning_rate": 0.0001755770997522372,
      "loss": 1.644,
      "step": 2986
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.349010944366455,
      "learning_rate": 0.00017556138755073716,
      "loss": 1.267,
      "step": 2987
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9371050596237183,
      "learning_rate": 0.00017554567100024456,
      "loss": 1.3005,
      "step": 2988
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2215502262115479,
      "learning_rate": 0.00017552995010166402,
      "loss": 1.9883,
      "step": 2989
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3600311279296875,
      "learning_rate": 0.00017551422485590032,
      "loss": 1.3845,
      "step": 2990
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4041953086853027,
      "learning_rate": 0.0001754984952638586,
      "loss": 1.7178,
      "step": 2991
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3883388042449951,
      "learning_rate": 0.00017548276132644413,
      "loss": 1.4646,
      "step": 2992
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0637996196746826,
      "learning_rate": 0.00017546702304456255,
      "loss": 1.7393,
      "step": 2993
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.7977830767631531,
      "learning_rate": 0.00017545128041911964,
      "loss": 1.1106,
      "step": 2994
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4317973852157593,
      "learning_rate": 0.00017543553345102152,
      "loss": 1.6201,
      "step": 2995
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.2807295322418213,
      "learning_rate": 0.00017541978214117445,
      "loss": 1.3199,
      "step": 2996
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.058272361755371,
      "learning_rate": 0.00017540402649048506,
      "loss": 1.6214,
      "step": 2997
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.302574634552002,
      "learning_rate": 0.0001753882664998602,
      "loss": 2.0424,
      "step": 2998
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3312197923660278,
      "learning_rate": 0.0001753725021702069,
      "loss": 1.6216,
      "step": 2999
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9094836711883545,
      "learning_rate": 0.00017535673350243248,
      "loss": 1.2893,
      "step": 3000
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6843135356903076,
      "learning_rate": 0.00017534096049744459,
      "loss": 1.413,
      "step": 3001
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6704548597335815,
      "learning_rate": 0.00017532518315615096,
      "loss": 1.6256,
      "step": 3002
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2793006896972656,
      "learning_rate": 0.00017530940147945977,
      "loss": 2.152,
      "step": 3003
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3399417400360107,
      "learning_rate": 0.00017529361546827924,
      "loss": 0.8198,
      "step": 3004
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.439415693283081,
      "learning_rate": 0.00017527782512351804,
      "loss": 1.8881,
      "step": 3005
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0062503814697266,
      "learning_rate": 0.00017526203044608495,
      "loss": 1.372,
      "step": 3006
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9257791042327881,
      "learning_rate": 0.00017524623143688902,
      "loss": 1.0432,
      "step": 3007
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4297720193862915,
      "learning_rate": 0.00017523042809683962,
      "loss": 2.1207,
      "step": 3008
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.203241229057312,
      "learning_rate": 0.00017521462042684633,
      "loss": 1.4381,
      "step": 3009
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1350959539413452,
      "learning_rate": 0.00017519880842781894,
      "loss": 1.3145,
      "step": 3010
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.8747830390930176,
      "learning_rate": 0.00017518299210066748,
      "loss": 2.2718,
      "step": 3011
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5215779542922974,
      "learning_rate": 0.00017516717144630238,
      "loss": 1.5143,
      "step": 3012
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7895687818527222,
      "learning_rate": 0.0001751513464656341,
      "loss": 1.6001,
      "step": 3013
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3306849002838135,
      "learning_rate": 0.00017513551715957356,
      "loss": 1.9226,
      "step": 3014
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.3006832599639893,
      "learning_rate": 0.00017511968352903177,
      "loss": 1.9407,
      "step": 3015
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0729440450668335,
      "learning_rate": 0.00017510384557492,
      "loss": 2.4353,
      "step": 3016
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1463361978530884,
      "learning_rate": 0.00017508800329814995,
      "loss": 1.7111,
      "step": 3017
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5429528951644897,
      "learning_rate": 0.00017507215669963327,
      "loss": 1.173,
      "step": 3018
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9619948267936707,
      "learning_rate": 0.00017505630578028216,
      "loss": 1.5683,
      "step": 3019
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.383739709854126,
      "learning_rate": 0.00017504045054100886,
      "loss": 0.9609,
      "step": 3020
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9041039943695068,
      "learning_rate": 0.00017502459098272594,
      "loss": 1.8314,
      "step": 3021
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4361225366592407,
      "learning_rate": 0.00017500872710634623,
      "loss": 1.223,
      "step": 3022
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0937055349349976,
      "learning_rate": 0.00017499285891278276,
      "loss": 1.4014,
      "step": 3023
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.033750057220459,
      "learning_rate": 0.00017497698640294885,
      "loss": 1.9695,
      "step": 3024
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5316928625106812,
      "learning_rate": 0.0001749611095777581,
      "loss": 1.2147,
      "step": 3025
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2391927242279053,
      "learning_rate": 0.0001749452284381242,
      "loss": 1.9812,
      "step": 3026
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0367850065231323,
      "learning_rate": 0.00017492934298496128,
      "loss": 1.4992,
      "step": 3027
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8268846273422241,
      "learning_rate": 0.00017491345321918363,
      "loss": 1.744,
      "step": 3028
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.812432289123535,
      "learning_rate": 0.0001748975591417058,
      "loss": 1.4736,
      "step": 3029
    },
    {
      "epoch": 0.23,
      "grad_norm": 4.415773868560791,
      "learning_rate": 0.0001748816607534426,
      "loss": 2.0781,
      "step": 3030
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1499978303909302,
      "learning_rate": 0.00017486575805530902,
      "loss": 1.2839,
      "step": 3031
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2472364902496338,
      "learning_rate": 0.00017484985104822043,
      "loss": 1.5553,
      "step": 3032
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5135338306427002,
      "learning_rate": 0.00017483393973309226,
      "loss": 1.8304,
      "step": 3033
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0218390226364136,
      "learning_rate": 0.00017481802411084042,
      "loss": 0.9947,
      "step": 3034
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3313552141189575,
      "learning_rate": 0.00017480210418238085,
      "loss": 1.2682,
      "step": 3035
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3570332527160645,
      "learning_rate": 0.00017478617994862988,
      "loss": 0.9918,
      "step": 3036
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.110066533088684,
      "learning_rate": 0.00017477025141050402,
      "loss": 1.684,
      "step": 3037
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4859238862991333,
      "learning_rate": 0.00017475431856892008,
      "loss": 1.476,
      "step": 3038
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.271141767501831,
      "learning_rate": 0.0001747383814247951,
      "loss": 1.6229,
      "step": 3039
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0436115264892578,
      "learning_rate": 0.00017472243997904626,
      "loss": 1.9066,
      "step": 3040
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6100084781646729,
      "learning_rate": 0.0001747064942325912,
      "loss": 1.4785,
      "step": 3041
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1734235286712646,
      "learning_rate": 0.0001746905441863476,
      "loss": 1.1533,
      "step": 3042
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0695306062698364,
      "learning_rate": 0.00017467458984123352,
      "loss": 1.8151,
      "step": 3043
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8815522193908691,
      "learning_rate": 0.0001746586311981672,
      "loss": 1.5128,
      "step": 3044
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2467896938323975,
      "learning_rate": 0.00017464266825806718,
      "loss": 1.696,
      "step": 3045
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4444350004196167,
      "learning_rate": 0.0001746267010218522,
      "loss": 1.7133,
      "step": 3046
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9965133666992188,
      "learning_rate": 0.0001746107294904413,
      "loss": 1.2879,
      "step": 3047
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.110505223274231,
      "learning_rate": 0.00017459475366475368,
      "loss": 1.4416,
      "step": 3048
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.461817741394043,
      "learning_rate": 0.00017457877354570887,
      "loss": 1.2143,
      "step": 3049
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5394742488861084,
      "learning_rate": 0.00017456278913422662,
      "loss": 1.189,
      "step": 3050
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5221259593963623,
      "learning_rate": 0.00017454680043122692,
      "loss": 1.7064,
      "step": 3051
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6113741397857666,
      "learning_rate": 0.00017453080743763,
      "loss": 2.4138,
      "step": 3052
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.9411944150924683,
      "learning_rate": 0.00017451481015435638,
      "loss": 1.0787,
      "step": 3053
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2137068510055542,
      "learning_rate": 0.00017449880858232675,
      "loss": 1.4642,
      "step": 3054
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2181295156478882,
      "learning_rate": 0.00017448280272246212,
      "loss": 1.4723,
      "step": 3055
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.092337131500244,
      "learning_rate": 0.00017446679257568373,
      "loss": 1.9833,
      "step": 3056
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3259040117263794,
      "learning_rate": 0.00017445077814291305,
      "loss": 1.7512,
      "step": 3057
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4349979162216187,
      "learning_rate": 0.00017443475942507174,
      "loss": 1.2214,
      "step": 3058
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7732903957366943,
      "learning_rate": 0.0001744187364230819,
      "loss": 1.0184,
      "step": 3059
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7030646800994873,
      "learning_rate": 0.00017440270913786563,
      "loss": 1.6288,
      "step": 3060
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4228830337524414,
      "learning_rate": 0.00017438667757034546,
      "loss": 2.2065,
      "step": 3061
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.587773084640503,
      "learning_rate": 0.00017437064172144405,
      "loss": 1.3063,
      "step": 3062
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0071665048599243,
      "learning_rate": 0.00017435460159208435,
      "loss": 1.2972,
      "step": 3063
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0699859857559204,
      "learning_rate": 0.00017433855718318965,
      "loss": 1.4123,
      "step": 3064
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4007996320724487,
      "learning_rate": 0.0001743225084956833,
      "loss": 1.6809,
      "step": 3065
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.360520124435425,
      "learning_rate": 0.00017430645553048906,
      "loss": 1.5087,
      "step": 3066
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2470479011535645,
      "learning_rate": 0.00017429039828853083,
      "loss": 0.9149,
      "step": 3067
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6801806688308716,
      "learning_rate": 0.0001742743367707328,
      "loss": 1.5542,
      "step": 3068
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.237159252166748,
      "learning_rate": 0.00017425827097801943,
      "loss": 1.6757,
      "step": 3069
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2682416439056396,
      "learning_rate": 0.00017424220091131535,
      "loss": 1.7571,
      "step": 3070
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2290107011795044,
      "learning_rate": 0.00017422612657154554,
      "loss": 1.3496,
      "step": 3071
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1162546873092651,
      "learning_rate": 0.00017421004795963512,
      "loss": 1.7228,
      "step": 3072
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.385746955871582,
      "learning_rate": 0.00017419396507650957,
      "loss": 1.5118,
      "step": 3073
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3236119747161865,
      "learning_rate": 0.0001741778779230945,
      "loss": 1.1364,
      "step": 3074
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1901017427444458,
      "learning_rate": 0.00017416178650031582,
      "loss": 2.0975,
      "step": 3075
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7596968412399292,
      "learning_rate": 0.00017414569080909974,
      "loss": 1.8651,
      "step": 3076
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5399742126464844,
      "learning_rate": 0.00017412959085037256,
      "loss": 1.6689,
      "step": 3077
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1819676160812378,
      "learning_rate": 0.000174113486625061,
      "loss": 1.4937,
      "step": 3078
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.0963289737701416,
      "learning_rate": 0.00017409737813409195,
      "loss": 1.4202,
      "step": 3079
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0702927112579346,
      "learning_rate": 0.00017408126537839252,
      "loss": 0.9172,
      "step": 3080
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.20769464969635,
      "learning_rate": 0.0001740651483588901,
      "loss": 1.3927,
      "step": 3081
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.2011337280273438,
      "learning_rate": 0.00017404902707651232,
      "loss": 2.0244,
      "step": 3082
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.243232250213623,
      "learning_rate": 0.00017403290153218705,
      "loss": 1.578,
      "step": 3083
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.7078917026519775,
      "learning_rate": 0.00017401677172684243,
      "loss": 2.0703,
      "step": 3084
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1654183864593506,
      "learning_rate": 0.00017400063766140678,
      "loss": 1.7418,
      "step": 3085
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.059012770652771,
      "learning_rate": 0.0001739844993368087,
      "loss": 1.3896,
      "step": 3086
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1354330778121948,
      "learning_rate": 0.00017396835675397715,
      "loss": 1.4008,
      "step": 3087
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2900335788726807,
      "learning_rate": 0.0001739522099138411,
      "loss": 1.4379,
      "step": 3088
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0521916151046753,
      "learning_rate": 0.00017393605881732996,
      "loss": 1.4651,
      "step": 3089
    },
    {
      "epoch": 0.24,
      "grad_norm": 4.175283908843994,
      "learning_rate": 0.0001739199034653733,
      "loss": 1.4375,
      "step": 3090
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.008591651916504,
      "learning_rate": 0.00017390374385890093,
      "loss": 1.7967,
      "step": 3091
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0504001379013062,
      "learning_rate": 0.000173887579998843,
      "loss": 1.3426,
      "step": 3092
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.943489134311676,
      "learning_rate": 0.00017387141188612975,
      "loss": 1.3664,
      "step": 3093
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9482603073120117,
      "learning_rate": 0.00017385523952169184,
      "loss": 1.57,
      "step": 3094
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.155174970626831,
      "learning_rate": 0.00017383906290645998,
      "loss": 1.3236,
      "step": 3095
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.6287511587142944,
      "learning_rate": 0.0001738228820413653,
      "loss": 1.1862,
      "step": 3096
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.177021861076355,
      "learning_rate": 0.00017380669692733904,
      "loss": 1.3405,
      "step": 3097
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0679246187210083,
      "learning_rate": 0.00017379050756531283,
      "loss": 1.3373,
      "step": 3098
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8142823576927185,
      "learning_rate": 0.0001737743139562184,
      "loss": 2.4352,
      "step": 3099
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.008788585662842,
      "learning_rate": 0.0001737581161009878,
      "loss": 1.8717,
      "step": 3100
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4965571165084839,
      "learning_rate": 0.00017374191400055332,
      "loss": 1.4899,
      "step": 3101
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.654518961906433,
      "learning_rate": 0.00017372570765584748,
      "loss": 1.577,
      "step": 3102
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.754359245300293,
      "learning_rate": 0.00017370949706780304,
      "loss": 1.9701,
      "step": 3103
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5933055877685547,
      "learning_rate": 0.000173693282237353,
      "loss": 1.0401,
      "step": 3104
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0167633295059204,
      "learning_rate": 0.00017367706316543063,
      "loss": 1.6855,
      "step": 3105
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.213606595993042,
      "learning_rate": 0.00017366083985296947,
      "loss": 1.5485,
      "step": 3106
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2401992082595825,
      "learning_rate": 0.00017364461230090318,
      "loss": 1.6997,
      "step": 3107
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.245445966720581,
      "learning_rate": 0.00017362838051016583,
      "loss": 1.0863,
      "step": 3108
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.155835747718811,
      "learning_rate": 0.00017361214448169163,
      "loss": 1.3436,
      "step": 3109
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.4447829723358154,
      "learning_rate": 0.00017359590421641503,
      "loss": 1.3649,
      "step": 3110
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2970452308654785,
      "learning_rate": 0.00017357965971527076,
      "loss": 1.5422,
      "step": 3111
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1629183292388916,
      "learning_rate": 0.0001735634109791938,
      "loss": 1.5535,
      "step": 3112
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.5693182945251465,
      "learning_rate": 0.00017354715800911935,
      "loss": 2.0264,
      "step": 3113
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.289480447769165,
      "learning_rate": 0.0001735309008059829,
      "loss": 1.2917,
      "step": 3114
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0440702438354492,
      "learning_rate": 0.00017351463937072004,
      "loss": 1.4228,
      "step": 3115
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.609310269355774,
      "learning_rate": 0.00017349837370426682,
      "loss": 1.6215,
      "step": 3116
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0353589057922363,
      "learning_rate": 0.00017348210380755937,
      "loss": 1.3686,
      "step": 3117
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1218571662902832,
      "learning_rate": 0.00017346582968153412,
      "loss": 1.6119,
      "step": 3118
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.463073492050171,
      "learning_rate": 0.00017344955132712778,
      "loss": 2.0293,
      "step": 3119
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6363188028335571,
      "learning_rate": 0.0001734332687452772,
      "loss": 1.9051,
      "step": 3120
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.307468295097351,
      "learning_rate": 0.00017341698193691957,
      "loss": 1.4273,
      "step": 3121
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2605336904525757,
      "learning_rate": 0.00017340069090299227,
      "loss": 1.3075,
      "step": 3122
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8987488746643066,
      "learning_rate": 0.000173384395644433,
      "loss": 1.2896,
      "step": 3123
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.098003625869751,
      "learning_rate": 0.00017336809616217956,
      "loss": 1.7931,
      "step": 3124
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0979524850845337,
      "learning_rate": 0.00017335179245717012,
      "loss": 1.7477,
      "step": 3125
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3954598903656006,
      "learning_rate": 0.00017333548453034306,
      "loss": 1.2139,
      "step": 3126
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8260125517845154,
      "learning_rate": 0.000173319172382637,
      "loss": 1.0363,
      "step": 3127
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7667053937911987,
      "learning_rate": 0.0001733028560149908,
      "loss": 1.9136,
      "step": 3128
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3752576112747192,
      "learning_rate": 0.00017328653542834354,
      "loss": 1.5028,
      "step": 3129
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2383195161819458,
      "learning_rate": 0.00017327021062363458,
      "loss": 1.789,
      "step": 3130
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.051517963409424,
      "learning_rate": 0.00017325388160180347,
      "loss": 1.9006,
      "step": 3131
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9351773858070374,
      "learning_rate": 0.00017323754836379008,
      "loss": 0.8214,
      "step": 3132
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1452029943466187,
      "learning_rate": 0.00017322121091053447,
      "loss": 1.0891,
      "step": 3133
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3327325582504272,
      "learning_rate": 0.00017320486924297696,
      "loss": 1.2746,
      "step": 3134
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3564882278442383,
      "learning_rate": 0.00017318852336205807,
      "loss": 1.674,
      "step": 3135
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0116907358169556,
      "learning_rate": 0.00017317217326871868,
      "loss": 1.1123,
      "step": 3136
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.677297592163086,
      "learning_rate": 0.00017315581896389977,
      "loss": 1.4597,
      "step": 3137
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3814013004302979,
      "learning_rate": 0.0001731394604485426,
      "loss": 1.0854,
      "step": 3138
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1428900957107544,
      "learning_rate": 0.00017312309772358876,
      "loss": 1.708,
      "step": 3139
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2624033689498901,
      "learning_rate": 0.00017310673078997998,
      "loss": 1.0193,
      "step": 3140
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9005012512207031,
      "learning_rate": 0.0001730903596486583,
      "loss": 1.6613,
      "step": 3141
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7738852500915527,
      "learning_rate": 0.00017307398430056593,
      "loss": 1.9092,
      "step": 3142
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1520870923995972,
      "learning_rate": 0.00017305760474664543,
      "loss": 1.1999,
      "step": 3143
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.51539945602417,
      "learning_rate": 0.0001730412209878395,
      "loss": 1.8352,
      "step": 3144
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1611194610595703,
      "learning_rate": 0.00017302483302509108,
      "loss": 1.6828,
      "step": 3145
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1938645839691162,
      "learning_rate": 0.00017300844085934344,
      "loss": 1.8339,
      "step": 3146
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.670752763748169,
      "learning_rate": 0.00017299204449154005,
      "loss": 1.4529,
      "step": 3147
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1932156085968018,
      "learning_rate": 0.00017297564392262458,
      "loss": 1.716,
      "step": 3148
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.128617763519287,
      "learning_rate": 0.00017295923915354103,
      "loss": 2.1963,
      "step": 3149
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8721367716789246,
      "learning_rate": 0.0001729428301852335,
      "loss": 1.1343,
      "step": 3150
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.8542912006378174,
      "learning_rate": 0.0001729264170186465,
      "loss": 1.5984,
      "step": 3151
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2105772495269775,
      "learning_rate": 0.00017290999965472473,
      "loss": 1.8021,
      "step": 3152
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.5310451984405518,
      "learning_rate": 0.00017289357809441297,
      "loss": 1.7108,
      "step": 3153
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5826287269592285,
      "learning_rate": 0.00017287715233865652,
      "loss": 1.6791,
      "step": 3154
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.508078098297119,
      "learning_rate": 0.00017286072238840067,
      "loss": 2.0825,
      "step": 3155
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7085782289505005,
      "learning_rate": 0.00017284428824459113,
      "loss": 1.2072,
      "step": 3156
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1588331460952759,
      "learning_rate": 0.00017282784990817372,
      "loss": 2.0122,
      "step": 3157
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2257243394851685,
      "learning_rate": 0.00017281140738009465,
      "loss": 1.6477,
      "step": 3158
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4731618165969849,
      "learning_rate": 0.0001727949606613002,
      "loss": 1.3081,
      "step": 3159
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.242685317993164,
      "learning_rate": 0.00017277850975273696,
      "loss": 1.4481,
      "step": 3160
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0255000591278076,
      "learning_rate": 0.00017276205465535186,
      "loss": 1.1597,
      "step": 3161
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9069308042526245,
      "learning_rate": 0.00017274559537009188,
      "loss": 1.3252,
      "step": 3162
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3259391784667969,
      "learning_rate": 0.00017272913189790443,
      "loss": 1.1066,
      "step": 3163
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9766326546669006,
      "learning_rate": 0.00017271266423973708,
      "loss": 1.4241,
      "step": 3164
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1954635381698608,
      "learning_rate": 0.00017269619239653757,
      "loss": 1.9109,
      "step": 3165
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7086601853370667,
      "learning_rate": 0.000172679716369254,
      "loss": 1.2765,
      "step": 3166
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9333212375640869,
      "learning_rate": 0.00017266323615883466,
      "loss": 1.6408,
      "step": 3167
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8512348532676697,
      "learning_rate": 0.00017264675176622806,
      "loss": 1.3743,
      "step": 3168
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.24356210231781,
      "learning_rate": 0.00017263026319238301,
      "loss": 1.7897,
      "step": 3169
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.249945878982544,
      "learning_rate": 0.00017261377043824847,
      "loss": 1.7836,
      "step": 3170
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1723673343658447,
      "learning_rate": 0.0001725972735047737,
      "loss": 1.0043,
      "step": 3171
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3579845428466797,
      "learning_rate": 0.00017258077239290826,
      "loss": 1.7335,
      "step": 3172
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0189741849899292,
      "learning_rate": 0.00017256426710360182,
      "loss": 1.76,
      "step": 3173
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9997395277023315,
      "learning_rate": 0.00017254775763780433,
      "loss": 1.3583,
      "step": 3174
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.748459577560425,
      "learning_rate": 0.00017253124399646606,
      "loss": 1.5799,
      "step": 3175
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0444482564926147,
      "learning_rate": 0.00017251472618053746,
      "loss": 1.8707,
      "step": 3176
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6488447189331055,
      "learning_rate": 0.00017249820419096921,
      "loss": 1.4905,
      "step": 3177
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8772696256637573,
      "learning_rate": 0.00017248167802871224,
      "loss": 1.8615,
      "step": 3178
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.274563193321228,
      "learning_rate": 0.00017246514769471777,
      "loss": 1.0338,
      "step": 3179
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.125361680984497,
      "learning_rate": 0.00017244861318993713,
      "loss": 1.5619,
      "step": 3180
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.9157822132110596,
      "learning_rate": 0.00017243207451532206,
      "loss": 2.5433,
      "step": 3181
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0078532695770264,
      "learning_rate": 0.0001724155316718244,
      "loss": 1.5814,
      "step": 3182
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8428738117218018,
      "learning_rate": 0.00017239898466039634,
      "loss": 1.7603,
      "step": 3183
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8325114250183105,
      "learning_rate": 0.00017238243348199022,
      "loss": 1.7662,
      "step": 3184
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.8251979947090149,
      "learning_rate": 0.00017236587813755863,
      "loss": 1.3323,
      "step": 3185
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6404802799224854,
      "learning_rate": 0.00017234931862805447,
      "loss": 1.8791,
      "step": 3186
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.24503755569458,
      "learning_rate": 0.00017233275495443081,
      "loss": 1.2585,
      "step": 3187
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0746095180511475,
      "learning_rate": 0.000172316187117641,
      "loss": 1.859,
      "step": 3188
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7009520530700684,
      "learning_rate": 0.00017229961511863864,
      "loss": 1.2592,
      "step": 3189
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.452483892440796,
      "learning_rate": 0.00017228303895837748,
      "loss": 2.2286,
      "step": 3190
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.629512071609497,
      "learning_rate": 0.00017226645863781164,
      "loss": 2.0715,
      "step": 3191
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.221763014793396,
      "learning_rate": 0.00017224987415789532,
      "loss": 1.154,
      "step": 3192
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1059436798095703,
      "learning_rate": 0.00017223328551958314,
      "loss": 1.6077,
      "step": 3193
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4584605693817139,
      "learning_rate": 0.00017221669272382987,
      "loss": 1.2429,
      "step": 3194
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2100855112075806,
      "learning_rate": 0.00017220009577159049,
      "loss": 1.6127,
      "step": 3195
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3761703968048096,
      "learning_rate": 0.00017218349466382023,
      "loss": 1.9817,
      "step": 3196
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3668127059936523,
      "learning_rate": 0.0001721668894014746,
      "loss": 1.33,
      "step": 3197
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0518157482147217,
      "learning_rate": 0.00017215027998550934,
      "loss": 1.5909,
      "step": 3198
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.0572000741958618,
      "learning_rate": 0.0001721336664168804,
      "loss": 1.3797,
      "step": 3199
    },
    {
      "epoch": 0.24,
      "grad_norm": 7.19317626953125,
      "learning_rate": 0.00017211704869654398,
      "loss": 2.3311,
      "step": 3200
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.9235233664512634,
      "learning_rate": 0.0001721004268254566,
      "loss": 1.5988,
      "step": 3201
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.032646656036377,
      "learning_rate": 0.00017208380080457485,
      "loss": 0.8423,
      "step": 3202
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.4112188816070557,
      "learning_rate": 0.0001720671706348557,
      "loss": 2.1565,
      "step": 3203
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.110274076461792,
      "learning_rate": 0.00017205053631725627,
      "loss": 1.8672,
      "step": 3204
    },
    {
      "epoch": 0.24,
      "grad_norm": 7.727638244628906,
      "learning_rate": 0.000172033897852734,
      "loss": 2.8964,
      "step": 3205
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.7486239075660706,
      "learning_rate": 0.00017201725524224653,
      "loss": 1.322,
      "step": 3206
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5090413093566895,
      "learning_rate": 0.00017200060848675171,
      "loss": 2.3393,
      "step": 3207
    },
    {
      "epoch": 0.24,
      "grad_norm": 5.990508079528809,
      "learning_rate": 0.00017198395758720773,
      "loss": 1.8789,
      "step": 3208
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.045660972595215,
      "learning_rate": 0.00017196730254457284,
      "loss": 1.793,
      "step": 3209
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4722065925598145,
      "learning_rate": 0.0001719506433598057,
      "loss": 1.4772,
      "step": 3210
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9874110221862793,
      "learning_rate": 0.0001719339800338651,
      "loss": 2.0456,
      "step": 3211
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8497264385223389,
      "learning_rate": 0.0001719173125677102,
      "loss": 1.4852,
      "step": 3212
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1813409328460693,
      "learning_rate": 0.00017190064096230017,
      "loss": 1.9991,
      "step": 3213
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2503477334976196,
      "learning_rate": 0.00017188396521859467,
      "loss": 1.5083,
      "step": 3214
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.749194383621216,
      "learning_rate": 0.00017186728533755344,
      "loss": 2.2352,
      "step": 3215
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.391902208328247,
      "learning_rate": 0.0001718506013201365,
      "loss": 2.6507,
      "step": 3216
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.04735267162323,
      "learning_rate": 0.00017183391316730412,
      "loss": 1.3118,
      "step": 3217
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.728901743888855,
      "learning_rate": 0.0001718172208800168,
      "loss": 1.1638,
      "step": 3218
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.828871488571167,
      "learning_rate": 0.0001718005244592353,
      "loss": 2.1189,
      "step": 3219
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.634564757347107,
      "learning_rate": 0.00017178382390592057,
      "loss": 1.8813,
      "step": 3220
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1614716053009033,
      "learning_rate": 0.00017176711922103379,
      "loss": 0.8183,
      "step": 3221
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6767380237579346,
      "learning_rate": 0.0001717504104055365,
      "loss": 1.291,
      "step": 3222
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.269092082977295,
      "learning_rate": 0.00017173369746039025,
      "loss": 1.3655,
      "step": 3223
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2901103496551514,
      "learning_rate": 0.00017171698038655713,
      "loss": 1.4321,
      "step": 3224
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3352326154708862,
      "learning_rate": 0.00017170025918499917,
      "loss": 1.6705,
      "step": 3225
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4011144638061523,
      "learning_rate": 0.00017168353385667884,
      "loss": 1.5416,
      "step": 3226
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1528393030166626,
      "learning_rate": 0.00017166680440255876,
      "loss": 1.4143,
      "step": 3227
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6141695976257324,
      "learning_rate": 0.00017165007082360184,
      "loss": 1.3641,
      "step": 3228
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6751445531845093,
      "learning_rate": 0.00017163333312077112,
      "loss": 0.7498,
      "step": 3229
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.122848629951477,
      "learning_rate": 0.00017161659129503003,
      "loss": 1.9451,
      "step": 3230
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4071842432022095,
      "learning_rate": 0.0001715998453473421,
      "loss": 1.4701,
      "step": 3231
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3066792488098145,
      "learning_rate": 0.00017158309527867118,
      "loss": 1.4367,
      "step": 3232
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.319726824760437,
      "learning_rate": 0.00017156634108998135,
      "loss": 1.5958,
      "step": 3233
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4929523468017578,
      "learning_rate": 0.00017154958278223686,
      "loss": 2.1879,
      "step": 3234
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.02256178855896,
      "learning_rate": 0.0001715328203564023,
      "loss": 1.4548,
      "step": 3235
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.9605705738067627,
      "learning_rate": 0.00017151605381344245,
      "loss": 1.2815,
      "step": 3236
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2208263874053955,
      "learning_rate": 0.00017149928315432226,
      "loss": 1.7256,
      "step": 3237
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.156975269317627,
      "learning_rate": 0.00017148250838000705,
      "loss": 1.2882,
      "step": 3238
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8713947534561157,
      "learning_rate": 0.0001714657294914622,
      "loss": 1.8314,
      "step": 3239
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1449956893920898,
      "learning_rate": 0.00017144894648965354,
      "loss": 1.732,
      "step": 3240
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9274911880493164,
      "learning_rate": 0.00017143215937554697,
      "loss": 1.9085,
      "step": 3241
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6978737115859985,
      "learning_rate": 0.00017141536815010872,
      "loss": 1.8786,
      "step": 3242
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3768186569213867,
      "learning_rate": 0.00017139857281430517,
      "loss": 1.383,
      "step": 3243
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.283390998840332,
      "learning_rate": 0.00017138177336910307,
      "loss": 1.7238,
      "step": 3244
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2438230514526367,
      "learning_rate": 0.00017136496981546925,
      "loss": 1.72,
      "step": 3245
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3512651920318604,
      "learning_rate": 0.0001713481621543709,
      "loss": 1.2914,
      "step": 3246
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0725106000900269,
      "learning_rate": 0.00017133135038677533,
      "loss": 1.4938,
      "step": 3247
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1192444562911987,
      "learning_rate": 0.0001713145345136502,
      "loss": 1.4765,
      "step": 3248
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9289127588272095,
      "learning_rate": 0.00017129771453596338,
      "loss": 0.926,
      "step": 3249
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.701712727546692,
      "learning_rate": 0.00017128089045468294,
      "loss": 1.3928,
      "step": 3250
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7660646438598633,
      "learning_rate": 0.0001712640622707772,
      "loss": 1.0005,
      "step": 3251
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5398088693618774,
      "learning_rate": 0.00017124722998521468,
      "loss": 1.1956,
      "step": 3252
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.8741055130958557,
      "learning_rate": 0.00017123039359896424,
      "loss": 1.4549,
      "step": 3253
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4847639799118042,
      "learning_rate": 0.0001712135531129949,
      "loss": 1.4848,
      "step": 3254
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0330333709716797,
      "learning_rate": 0.00017119670852827588,
      "loss": 2.2391,
      "step": 3255
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3254930973052979,
      "learning_rate": 0.00017117985984577673,
      "loss": 1.1575,
      "step": 3256
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3255425691604614,
      "learning_rate": 0.00017116300706646715,
      "loss": 1.1931,
      "step": 3257
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0080981254577637,
      "learning_rate": 0.00017114615019131715,
      "loss": 2.0198,
      "step": 3258
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2628114223480225,
      "learning_rate": 0.00017112928922129693,
      "loss": 1.1287,
      "step": 3259
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7475099563598633,
      "learning_rate": 0.0001711124241573769,
      "loss": 2.456,
      "step": 3260
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.8191540241241455,
      "learning_rate": 0.0001710955550005278,
      "loss": 1.3011,
      "step": 3261
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3089003562927246,
      "learning_rate": 0.00017107868175172052,
      "loss": 2.017,
      "step": 3262
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5264832973480225,
      "learning_rate": 0.00017106180441192622,
      "loss": 1.6971,
      "step": 3263
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.737676739692688,
      "learning_rate": 0.00017104492298211627,
      "loss": 1.911,
      "step": 3264
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0013333559036255,
      "learning_rate": 0.00017102803746326227,
      "loss": 1.5961,
      "step": 3265
    },
    {
      "epoch": 0.25,
      "grad_norm": 4.3155975341796875,
      "learning_rate": 0.00017101114785633616,
      "loss": 1.9005,
      "step": 3266
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2570855617523193,
      "learning_rate": 0.00017099425416230996,
      "loss": 1.532,
      "step": 3267
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.266875743865967,
      "learning_rate": 0.00017097735638215604,
      "loss": 1.9617,
      "step": 3268
    },
    {
      "epoch": 0.25,
      "grad_norm": 6.9060516357421875,
      "learning_rate": 0.00017096045451684693,
      "loss": 3.643,
      "step": 3269
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3236758708953857,
      "learning_rate": 0.00017094354856735547,
      "loss": 1.7589,
      "step": 3270
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.8639273643493652,
      "learning_rate": 0.00017092663853465466,
      "loss": 1.3197,
      "step": 3271
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6511452198028564,
      "learning_rate": 0.0001709097244197178,
      "loss": 1.1051,
      "step": 3272
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3213467597961426,
      "learning_rate": 0.00017089280622351836,
      "loss": 0.9661,
      "step": 3273
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1358453035354614,
      "learning_rate": 0.00017087588394703012,
      "loss": 1.8537,
      "step": 3274
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2423231601715088,
      "learning_rate": 0.00017085895759122699,
      "loss": 1.2713,
      "step": 3275
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.050451397895813,
      "learning_rate": 0.0001708420271570833,
      "loss": 1.6213,
      "step": 3276
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1322448253631592,
      "learning_rate": 0.0001708250926455733,
      "loss": 1.0472,
      "step": 3277
    },
    {
      "epoch": 0.25,
      "eval_loss": NaN,
      "eval_runtime": 290.4,
      "eval_samples_per_second": 9.504,
      "eval_steps_per_second": 9.504,
      "step": 3277
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2804020643234253,
      "learning_rate": 0.0001708081540576719,
      "loss": 1.8394,
      "step": 3278
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8300299644470215,
      "learning_rate": 0.00017079121139435382,
      "loss": 1.2671,
      "step": 3279
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.890185594558716,
      "learning_rate": 0.00017077426465659433,
      "loss": 1.6962,
      "step": 3280
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.032212495803833,
      "learning_rate": 0.00017075731384536873,
      "loss": 1.6514,
      "step": 3281
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9343369603157043,
      "learning_rate": 0.00017074035896165267,
      "loss": 1.351,
      "step": 3282
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2307392358779907,
      "learning_rate": 0.00017072340000642204,
      "loss": 1.0415,
      "step": 3283
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8095808029174805,
      "learning_rate": 0.00017070643698065288,
      "loss": 1.6991,
      "step": 3284
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5682848691940308,
      "learning_rate": 0.0001706894698853215,
      "loss": 1.872,
      "step": 3285
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9572526216506958,
      "learning_rate": 0.0001706724987214045,
      "loss": 1.5394,
      "step": 3286
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.214939832687378,
      "learning_rate": 0.00017065552348987864,
      "loss": 1.0323,
      "step": 3287
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.3364109992980957,
      "learning_rate": 0.00017063854419172093,
      "loss": 2.1057,
      "step": 3288
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2834614515304565,
      "learning_rate": 0.00017062156082790864,
      "loss": 1.5317,
      "step": 3289
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.127657175064087,
      "learning_rate": 0.00017060457339941926,
      "loss": 1.5269,
      "step": 3290
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1004339456558228,
      "learning_rate": 0.0001705875819072305,
      "loss": 1.6671,
      "step": 3291
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7010769844055176,
      "learning_rate": 0.00017057058635232036,
      "loss": 3.1236,
      "step": 3292
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4517911672592163,
      "learning_rate": 0.00017055358673566702,
      "loss": 1.9065,
      "step": 3293
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3614928722381592,
      "learning_rate": 0.00017053658305824887,
      "loss": 1.4839,
      "step": 3294
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6939901113510132,
      "learning_rate": 0.0001705195753210446,
      "loss": 1.2795,
      "step": 3295
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1790231466293335,
      "learning_rate": 0.00017050256352503306,
      "loss": 1.3833,
      "step": 3296
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.382116675376892,
      "learning_rate": 0.00017048554767119342,
      "loss": 0.9847,
      "step": 3297
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6134663820266724,
      "learning_rate": 0.00017046852776050507,
      "loss": 1.226,
      "step": 3298
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1933883428573608,
      "learning_rate": 0.00017045150379394752,
      "loss": 1.6757,
      "step": 3299
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2701945304870605,
      "learning_rate": 0.00017043447577250068,
      "loss": 1.5813,
      "step": 3300
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2125165462493896,
      "learning_rate": 0.00017041744369714453,
      "loss": 1.541,
      "step": 3301
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.763401746749878,
      "learning_rate": 0.00017040040756885943,
      "loss": 1.6237,
      "step": 3302
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9295063018798828,
      "learning_rate": 0.00017038336738862587,
      "loss": 1.3843,
      "step": 3303
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1394075155258179,
      "learning_rate": 0.00017036632315742462,
      "loss": 1.6819,
      "step": 3304
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9762579202651978,
      "learning_rate": 0.0001703492748762367,
      "loss": 1.7898,
      "step": 3305
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3910648822784424,
      "learning_rate": 0.00017033222254604332,
      "loss": 1.945,
      "step": 3306
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9067685604095459,
      "learning_rate": 0.0001703151661678259,
      "loss": 1.3843,
      "step": 3307
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.039344072341919,
      "learning_rate": 0.0001702981057425662,
      "loss": 1.3229,
      "step": 3308
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3668038845062256,
      "learning_rate": 0.00017028104127124607,
      "loss": 1.8564,
      "step": 3309
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9577736854553223,
      "learning_rate": 0.00017026397275484773,
      "loss": 0.7808,
      "step": 3310
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6363779306411743,
      "learning_rate": 0.00017024690019435356,
      "loss": 1.6364,
      "step": 3311
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.250922441482544,
      "learning_rate": 0.00017022982359074618,
      "loss": 1.7477,
      "step": 3312
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4770711660385132,
      "learning_rate": 0.00017021274294500843,
      "loss": 2.0,
      "step": 3313
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0837905406951904,
      "learning_rate": 0.0001701956582581234,
      "loss": 2.0512,
      "step": 3314
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1321505308151245,
      "learning_rate": 0.00017017856953107442,
      "loss": 1.6975,
      "step": 3315
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4284919500350952,
      "learning_rate": 0.00017016147676484508,
      "loss": 1.6255,
      "step": 3316
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9374521970748901,
      "learning_rate": 0.00017014437996041914,
      "loss": 1.2785,
      "step": 3317
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4307385683059692,
      "learning_rate": 0.00017012727911878058,
      "loss": 1.1331,
      "step": 3318
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2073283195495605,
      "learning_rate": 0.0001701101742409137,
      "loss": 2.6454,
      "step": 3319
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.144713044166565,
      "learning_rate": 0.00017009306532780297,
      "loss": 1.5475,
      "step": 3320
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.010377287864685,
      "learning_rate": 0.00017007595238043312,
      "loss": 1.4672,
      "step": 3321
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8146411180496216,
      "learning_rate": 0.0001700588353997891,
      "loss": 1.7344,
      "step": 3322
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2910068035125732,
      "learning_rate": 0.00017004171438685602,
      "loss": 1.5891,
      "step": 3323
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9824674725532532,
      "learning_rate": 0.00017002458934261937,
      "loss": 1.3318,
      "step": 3324
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9380732774734497,
      "learning_rate": 0.0001700074602680648,
      "loss": 1.385,
      "step": 3325
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0014727115631104,
      "learning_rate": 0.00016999032716417814,
      "loss": 1.2321,
      "step": 3326
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5269891023635864,
      "learning_rate": 0.00016997319003194555,
      "loss": 1.4492,
      "step": 3327
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4578783512115479,
      "learning_rate": 0.0001699560488723533,
      "loss": 1.1062,
      "step": 3328
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9401919841766357,
      "learning_rate": 0.000169938903686388,
      "loss": 1.1879,
      "step": 3329
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9485424160957336,
      "learning_rate": 0.0001699217544750365,
      "loss": 2.0115,
      "step": 3330
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.9186760783195496,
      "learning_rate": 0.00016990460123928575,
      "loss": 1.0222,
      "step": 3331
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.0409334897994995,
      "learning_rate": 0.00016988744398012306,
      "loss": 1.045,
      "step": 3332
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.848015546798706,
      "learning_rate": 0.00016987028269853597,
      "loss": 1.7338,
      "step": 3333
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5895001888275146,
      "learning_rate": 0.00016985311739551212,
      "loss": 1.6043,
      "step": 3334
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9438313245773315,
      "learning_rate": 0.00016983594807203957,
      "loss": 1.5789,
      "step": 3335
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.8740501403808594,
      "learning_rate": 0.0001698187747291064,
      "loss": 1.6852,
      "step": 3336
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2209748029708862,
      "learning_rate": 0.00016980159736770117,
      "loss": 1.8404,
      "step": 3337
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3053667545318604,
      "learning_rate": 0.0001697844159888124,
      "loss": 1.2424,
      "step": 3338
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2673110961914062,
      "learning_rate": 0.0001697672305934291,
      "loss": 1.6148,
      "step": 3339
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7233154773712158,
      "learning_rate": 0.0001697500411825403,
      "loss": 1.1788,
      "step": 3340
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6609960794448853,
      "learning_rate": 0.00016973284775713537,
      "loss": 1.5368,
      "step": 3341
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.785298466682434,
      "learning_rate": 0.0001697156503182039,
      "loss": 1.956,
      "step": 3342
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5873109102249146,
      "learning_rate": 0.00016969844886673575,
      "loss": 1.7359,
      "step": 3343
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9765797853469849,
      "learning_rate": 0.00016968124340372085,
      "loss": 1.3551,
      "step": 3344
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.590441346168518,
      "learning_rate": 0.00016966403393014958,
      "loss": 1.6835,
      "step": 3345
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0181374549865723,
      "learning_rate": 0.00016964682044701238,
      "loss": 1.6035,
      "step": 3346
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2603812217712402,
      "learning_rate": 0.00016962960295530003,
      "loss": 1.3637,
      "step": 3347
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5083367824554443,
      "learning_rate": 0.00016961238145600345,
      "loss": 2.0505,
      "step": 3348
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.028398275375366,
      "learning_rate": 0.00016959515595011388,
      "loss": 0.6465,
      "step": 3349
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6357065439224243,
      "learning_rate": 0.0001695779264386227,
      "loss": 1.23,
      "step": 3350
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.445113182067871,
      "learning_rate": 0.0001695606929225216,
      "loss": 1.7872,
      "step": 3351
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4801217317581177,
      "learning_rate": 0.0001695434554028025,
      "loss": 2.1189,
      "step": 3352
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2091350555419922,
      "learning_rate": 0.00016952621388045743,
      "loss": 1.1531,
      "step": 3353
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.008834958076477,
      "learning_rate": 0.00016950896835647882,
      "loss": 1.4947,
      "step": 3354
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0043843984603882,
      "learning_rate": 0.00016949171883185918,
      "loss": 1.2478,
      "step": 3355
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.110344409942627,
      "learning_rate": 0.0001694744653075914,
      "loss": 1.6122,
      "step": 3356
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6664526462554932,
      "learning_rate": 0.00016945720778466846,
      "loss": 1.7903,
      "step": 3357
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8982508182525635,
      "learning_rate": 0.00016943994626408363,
      "loss": 1.5008,
      "step": 3358
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1391743421554565,
      "learning_rate": 0.00016942268074683043,
      "loss": 1.0368,
      "step": 3359
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9887419939041138,
      "learning_rate": 0.0001694054112339026,
      "loss": 1.7964,
      "step": 3360
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2466521263122559,
      "learning_rate": 0.00016938813772629413,
      "loss": 1.5419,
      "step": 3361
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1982687711715698,
      "learning_rate": 0.00016937086022499907,
      "loss": 1.4044,
      "step": 3362
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5245871543884277,
      "learning_rate": 0.000169353578731012,
      "loss": 1.451,
      "step": 3363
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5185128450393677,
      "learning_rate": 0.00016933629324532748,
      "loss": 1.4115,
      "step": 3364
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9863423109054565,
      "learning_rate": 0.00016931900376894042,
      "loss": 1.018,
      "step": 3365
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.9782361388206482,
      "learning_rate": 0.00016930171030284593,
      "loss": 1.4341,
      "step": 3366
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.299201250076294,
      "learning_rate": 0.00016928441284803935,
      "loss": 1.6497,
      "step": 3367
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.659353256225586,
      "learning_rate": 0.0001692671114055162,
      "loss": 2.166,
      "step": 3368
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.073413610458374,
      "learning_rate": 0.00016924980597627238,
      "loss": 2.4347,
      "step": 3369
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.574845314025879,
      "learning_rate": 0.0001692324965613038,
      "loss": 1.6321,
      "step": 3370
    },
    {
      "epoch": 0.26,
      "grad_norm": 4.295227527618408,
      "learning_rate": 0.00016921518316160677,
      "loss": 1.9354,
      "step": 3371
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.112055778503418,
      "learning_rate": 0.00016919786577817783,
      "loss": 2.0206,
      "step": 3372
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8712009191513062,
      "learning_rate": 0.00016918054441201357,
      "loss": 1.6769,
      "step": 3373
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3104976415634155,
      "learning_rate": 0.00016916321906411104,
      "loss": 1.3039,
      "step": 3374
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.206186294555664,
      "learning_rate": 0.0001691458897354674,
      "loss": 1.531,
      "step": 3375
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.18299400806427,
      "learning_rate": 0.00016912855642708,
      "loss": 1.9831,
      "step": 3376
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2597404718399048,
      "learning_rate": 0.00016911121913994653,
      "loss": 1.8309,
      "step": 3377
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.092761754989624,
      "learning_rate": 0.00016909387787506479,
      "loss": 1.0577,
      "step": 3378
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.8251240253448486,
      "learning_rate": 0.00016907653263343293,
      "loss": 1.8165,
      "step": 3379
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.600578784942627,
      "learning_rate": 0.00016905918341604922,
      "loss": 3.0649,
      "step": 3380
    },
    {
      "epoch": 0.26,
      "grad_norm": 5.0617780685424805,
      "learning_rate": 0.00016904183022391222,
      "loss": 2.2203,
      "step": 3381
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.2971460819244385,
      "learning_rate": 0.00016902447305802075,
      "loss": 1.5594,
      "step": 3382
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2128441333770752,
      "learning_rate": 0.00016900711191937371,
      "loss": 1.0065,
      "step": 3383
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4101219177246094,
      "learning_rate": 0.00016898974680897052,
      "loss": 1.5364,
      "step": 3384
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4242682456970215,
      "learning_rate": 0.00016897237772781044,
      "loss": 1.7217,
      "step": 3385
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1318906545639038,
      "learning_rate": 0.00016895500467689324,
      "loss": 1.4972,
      "step": 3386
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.9570276737213135,
      "learning_rate": 0.0001689376276572189,
      "loss": 1.7386,
      "step": 3387
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.8036510944366455,
      "learning_rate": 0.00016892024666978746,
      "loss": 1.2071,
      "step": 3388
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5716853141784668,
      "learning_rate": 0.00016890286171559937,
      "loss": 1.5492,
      "step": 3389
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6776238679885864,
      "learning_rate": 0.0001688854727956552,
      "loss": 1.8651,
      "step": 3390
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9100931882858276,
      "learning_rate": 0.0001688680799109558,
      "loss": 1.6005,
      "step": 3391
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.560004472732544,
      "learning_rate": 0.00016885068306250224,
      "loss": 1.3681,
      "step": 3392
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7669737339019775,
      "learning_rate": 0.00016883328225129578,
      "loss": 1.4375,
      "step": 3393
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6707173585891724,
      "learning_rate": 0.00016881587747833793,
      "loss": 1.7827,
      "step": 3394
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2719504833221436,
      "learning_rate": 0.0001687984687446305,
      "loss": 1.0714,
      "step": 3395
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4141573905944824,
      "learning_rate": 0.0001687810560511754,
      "loss": 1.3382,
      "step": 3396
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4905544519424438,
      "learning_rate": 0.00016876363939897484,
      "loss": 1.7875,
      "step": 3397
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.460671305656433,
      "learning_rate": 0.00016874621878903127,
      "loss": 1.4922,
      "step": 3398
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6392245292663574,
      "learning_rate": 0.0001687287942223473,
      "loss": 1.7702,
      "step": 3399
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.208905577659607,
      "learning_rate": 0.00016871136569992587,
      "loss": 1.7913,
      "step": 3400
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2503423690795898,
      "learning_rate": 0.00016869393322277006,
      "loss": 2.1161,
      "step": 3401
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.110331654548645,
      "learning_rate": 0.00016867649679188325,
      "loss": 1.7119,
      "step": 3402
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2512001991271973,
      "learning_rate": 0.00016865905640826894,
      "loss": 1.3912,
      "step": 3403
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0948933362960815,
      "learning_rate": 0.00016864161207293097,
      "loss": 1.6299,
      "step": 3404
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5465006828308105,
      "learning_rate": 0.0001686241637868734,
      "loss": 1.187,
      "step": 3405
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8818347454071045,
      "learning_rate": 0.00016860671155110034,
      "loss": 2.1738,
      "step": 3406
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2189950942993164,
      "learning_rate": 0.0001685892553666164,
      "loss": 2.0251,
      "step": 3407
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2494311332702637,
      "learning_rate": 0.00016857179523442625,
      "loss": 1.1826,
      "step": 3408
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1997917890548706,
      "learning_rate": 0.0001685543311555348,
      "loss": 1.6797,
      "step": 3409
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8103384971618652,
      "learning_rate": 0.00016853686313094722,
      "loss": 1.9471,
      "step": 3410
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3230997323989868,
      "learning_rate": 0.00016851939116166892,
      "loss": 0.9559,
      "step": 3411
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0478084087371826,
      "learning_rate": 0.00016850191524870546,
      "loss": 1.2151,
      "step": 3412
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.295527696609497,
      "learning_rate": 0.00016848443539306274,
      "loss": 1.1566,
      "step": 3413
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2115347385406494,
      "learning_rate": 0.00016846695159574676,
      "loss": 1.8344,
      "step": 3414
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2891559600830078,
      "learning_rate": 0.00016844946385776384,
      "loss": 1.0533,
      "step": 3415
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.0851922035217285,
      "learning_rate": 0.00016843197218012053,
      "loss": 1.5724,
      "step": 3416
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4213348627090454,
      "learning_rate": 0.0001684144765638236,
      "loss": 1.0663,
      "step": 3417
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.940964698791504,
      "learning_rate": 0.0001683969770098799,
      "loss": 1.4721,
      "step": 3418
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4511553049087524,
      "learning_rate": 0.00016837947351929674,
      "loss": 2.1713,
      "step": 3419
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2085249423980713,
      "learning_rate": 0.0001683619660930815,
      "loss": 1.5817,
      "step": 3420
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.742965579032898,
      "learning_rate": 0.00016834445473224184,
      "loss": 2.1874,
      "step": 3421
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1664948463439941,
      "learning_rate": 0.00016832693943778565,
      "loss": 1.9385,
      "step": 3422
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9117121696472168,
      "learning_rate": 0.00016830942021072104,
      "loss": 1.6621,
      "step": 3423
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.240443229675293,
      "learning_rate": 0.00016829189705205635,
      "loss": 1.3381,
      "step": 3424
    },
    {
      "epoch": 0.26,
      "grad_norm": 4.141904354095459,
      "learning_rate": 0.00016827436996280008,
      "loss": 2.5965,
      "step": 3425
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4643594026565552,
      "learning_rate": 0.0001682568389439611,
      "loss": 1.3881,
      "step": 3426
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.56811785697937,
      "learning_rate": 0.00016823930399654838,
      "loss": 4.1507,
      "step": 3427
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1132149696350098,
      "learning_rate": 0.00016822176512157114,
      "loss": 1.8953,
      "step": 3428
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1443995237350464,
      "learning_rate": 0.00016820422232003885,
      "loss": 1.3353,
      "step": 3429
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.43739652633667,
      "learning_rate": 0.0001681866755929612,
      "loss": 1.655,
      "step": 3430
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7312259674072266,
      "learning_rate": 0.00016816912494134814,
      "loss": 1.5133,
      "step": 3431
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.107632040977478,
      "learning_rate": 0.00016815157036620976,
      "loss": 1.2304,
      "step": 3432
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8381803035736084,
      "learning_rate": 0.00016813401186855647,
      "loss": 2.2526,
      "step": 3433
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0570344924926758,
      "learning_rate": 0.00016811644944939884,
      "loss": 1.6941,
      "step": 3434
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.549635410308838,
      "learning_rate": 0.00016809888310974767,
      "loss": 1.7807,
      "step": 3435
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.5106966495513916,
      "learning_rate": 0.00016808131285061406,
      "loss": 2.2386,
      "step": 3436
    },
    {
      "epoch": 0.26,
      "grad_norm": 4.401878356933594,
      "learning_rate": 0.00016806373867300924,
      "loss": 2.3683,
      "step": 3437
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1786879301071167,
      "learning_rate": 0.0001680461605779447,
      "loss": 2.0409,
      "step": 3438
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2691142559051514,
      "learning_rate": 0.00016802857856643215,
      "loss": 1.5395,
      "step": 3439
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8905774354934692,
      "learning_rate": 0.0001680109926394836,
      "loss": 1.5983,
      "step": 3440
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.8704274892807007,
      "learning_rate": 0.00016799340279811115,
      "loss": 0.9089,
      "step": 3441
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0321638584136963,
      "learning_rate": 0.0001679758090433272,
      "loss": 1.7623,
      "step": 3442
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2211638689041138,
      "learning_rate": 0.00016795821137614443,
      "loss": 1.0976,
      "step": 3443
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3726553916931152,
      "learning_rate": 0.00016794060979757566,
      "loss": 2.029,
      "step": 3444
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9267628192901611,
      "learning_rate": 0.00016792300430863396,
      "loss": 1.1508,
      "step": 3445
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4429035186767578,
      "learning_rate": 0.00016790539491033258,
      "loss": 1.7346,
      "step": 3446
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9637951850891113,
      "learning_rate": 0.0001678877816036851,
      "loss": 1.4848,
      "step": 3447
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.306792974472046,
      "learning_rate": 0.00016787016438970528,
      "loss": 2.0091,
      "step": 3448
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1906203031539917,
      "learning_rate": 0.00016785254326940702,
      "loss": 1.3492,
      "step": 3449
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.064347743988037,
      "learning_rate": 0.0001678349182438046,
      "loss": 1.4329,
      "step": 3450
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3644301891326904,
      "learning_rate": 0.00016781728931391236,
      "loss": 1.6073,
      "step": 3451
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8066524267196655,
      "learning_rate": 0.00016779965648074506,
      "loss": 1.9811,
      "step": 3452
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.568904161453247,
      "learning_rate": 0.00016778201974531744,
      "loss": 1.4375,
      "step": 3453
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.199591875076294,
      "learning_rate": 0.00016776437910864464,
      "loss": 1.6771,
      "step": 3454
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8195091485977173,
      "learning_rate": 0.00016774673457174206,
      "loss": 1.1941,
      "step": 3455
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8004591464996338,
      "learning_rate": 0.00016772908613562511,
      "loss": 1.5038,
      "step": 3456
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0398424863815308,
      "learning_rate": 0.00016771143380130967,
      "loss": 1.5306,
      "step": 3457
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.305745005607605,
      "learning_rate": 0.00016769377756981172,
      "loss": 1.7455,
      "step": 3458
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4318927526474,
      "learning_rate": 0.00016767611744214738,
      "loss": 1.406,
      "step": 3459
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2291882038116455,
      "learning_rate": 0.0001676584534193332,
      "loss": 1.1147,
      "step": 3460
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0466910600662231,
      "learning_rate": 0.00016764078550238583,
      "loss": 1.0695,
      "step": 3461
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3385140895843506,
      "learning_rate": 0.0001676231136923221,
      "loss": 2.1542,
      "step": 3462
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4174200296401978,
      "learning_rate": 0.00016760543799015917,
      "loss": 1.4001,
      "step": 3463
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5793951749801636,
      "learning_rate": 0.00016758775839691438,
      "loss": 1.8638,
      "step": 3464
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.407059669494629,
      "learning_rate": 0.0001675700749136053,
      "loss": 1.6226,
      "step": 3465
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.105878233909607,
      "learning_rate": 0.00016755238754124965,
      "loss": 1.5035,
      "step": 3466
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7290772199630737,
      "learning_rate": 0.00016753469628086554,
      "loss": 1.7971,
      "step": 3467
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4918105602264404,
      "learning_rate": 0.0001675170011334711,
      "loss": 1.76,
      "step": 3468
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6732120513916016,
      "learning_rate": 0.00016749930210008487,
      "loss": 1.6448,
      "step": 3469
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2819446325302124,
      "learning_rate": 0.00016748159918172553,
      "loss": 1.8149,
      "step": 3470
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.404583215713501,
      "learning_rate": 0.00016746389237941192,
      "loss": 2.131,
      "step": 3471
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0628360509872437,
      "learning_rate": 0.0001674461816941632,
      "loss": 1.6128,
      "step": 3472
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.0888179540634155,
      "learning_rate": 0.00016742846712699877,
      "loss": 1.0703,
      "step": 3473
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6563665866851807,
      "learning_rate": 0.00016741074867893812,
      "loss": 2.1577,
      "step": 3474
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.132419228553772,
      "learning_rate": 0.00016739302635100108,
      "loss": 1.4553,
      "step": 3475
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1480751037597656,
      "learning_rate": 0.00016737530014420774,
      "loss": 1.3673,
      "step": 3476
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.6433425545692444,
      "learning_rate": 0.00016735757005957824,
      "loss": 1.4747,
      "step": 3477
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.2380518913269043,
      "learning_rate": 0.00016733983609813313,
      "loss": 2.3841,
      "step": 3478
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3644828796386719,
      "learning_rate": 0.00016732209826089304,
      "loss": 1.024,
      "step": 3479
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7980259656906128,
      "learning_rate": 0.0001673043565488789,
      "loss": 1.8315,
      "step": 3480
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.5259711742401123,
      "learning_rate": 0.0001672866109631119,
      "loss": 2.5433,
      "step": 3481
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8921871185302734,
      "learning_rate": 0.00016726886150461332,
      "loss": 1.6333,
      "step": 3482
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2909505367279053,
      "learning_rate": 0.0001672511081744048,
      "loss": 1.4446,
      "step": 3483
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.78010892868042,
      "learning_rate": 0.00016723335097350815,
      "loss": 2.3338,
      "step": 3484
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1423916816711426,
      "learning_rate": 0.00016721558990294536,
      "loss": 1.562,
      "step": 3485
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3733054399490356,
      "learning_rate": 0.0001671978249637387,
      "loss": 1.4474,
      "step": 3486
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4393484592437744,
      "learning_rate": 0.00016718005615691065,
      "loss": 1.6599,
      "step": 3487
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1845935583114624,
      "learning_rate": 0.00016716228348348392,
      "loss": 1.1124,
      "step": 3488
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0935050249099731,
      "learning_rate": 0.00016714450694448139,
      "loss": 1.328,
      "step": 3489
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.257674217224121,
      "learning_rate": 0.00016712672654092622,
      "loss": 1.568,
      "step": 3490
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3557566404342651,
      "learning_rate": 0.00016710894227384178,
      "loss": 1.4095,
      "step": 3491
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2019203901290894,
      "learning_rate": 0.00016709115414425166,
      "loss": 1.759,
      "step": 3492
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.18543541431427,
      "learning_rate": 0.00016707336215317968,
      "loss": 0.7343,
      "step": 3493
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2848823070526123,
      "learning_rate": 0.00016705556630164985,
      "loss": 0.9662,
      "step": 3494
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.061563491821289,
      "learning_rate": 0.00016703776659068642,
      "loss": 2.6853,
      "step": 3495
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.253312110900879,
      "learning_rate": 0.0001670199630213139,
      "loss": 1.8141,
      "step": 3496
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.19853937625885,
      "learning_rate": 0.00016700215559455693,
      "loss": 1.2275,
      "step": 3497
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.15877103805542,
      "learning_rate": 0.00016698434431144046,
      "loss": 1.1747,
      "step": 3498
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.226472020149231,
      "learning_rate": 0.00016696652917298966,
      "loss": 1.0031,
      "step": 3499
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.259232521057129,
      "learning_rate": 0.00016694871018022985,
      "loss": 1.2933,
      "step": 3500
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.496620774269104,
      "learning_rate": 0.00016693088733418663,
      "loss": 1.0425,
      "step": 3501
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.247049331665039,
      "learning_rate": 0.00016691306063588583,
      "loss": 0.715,
      "step": 3502
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.737622857093811,
      "learning_rate": 0.00016689523008635345,
      "loss": 1.708,
      "step": 3503
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5525676012039185,
      "learning_rate": 0.00016687739568661575,
      "loss": 1.2464,
      "step": 3504
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2296339273452759,
      "learning_rate": 0.0001668595574376992,
      "loss": 1.4303,
      "step": 3505
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0917637348175049,
      "learning_rate": 0.00016684171534063055,
      "loss": 1.1017,
      "step": 3506
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9725021123886108,
      "learning_rate": 0.0001668238693964366,
      "loss": 1.514,
      "step": 3507
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6701240539550781,
      "learning_rate": 0.00016680601960614456,
      "loss": 1.382,
      "step": 3508
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2493995428085327,
      "learning_rate": 0.0001667881659707818,
      "loss": 1.6325,
      "step": 3509
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9721093773841858,
      "learning_rate": 0.00016677030849137584,
      "loss": 1.7984,
      "step": 3510
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4787979125976562,
      "learning_rate": 0.00016675244716895455,
      "loss": 1.9826,
      "step": 3511
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4166840314865112,
      "learning_rate": 0.00016673458200454594,
      "loss": 2.624,
      "step": 3512
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0569026470184326,
      "learning_rate": 0.0001667167129991782,
      "loss": 1.5883,
      "step": 3513
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5277348756790161,
      "learning_rate": 0.00016669884015387982,
      "loss": 1.3942,
      "step": 3514
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6683844327926636,
      "learning_rate": 0.0001666809634696795,
      "loss": 2.154,
      "step": 3515
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2294747829437256,
      "learning_rate": 0.00016666308294760614,
      "loss": 1.5221,
      "step": 3516
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5990139245986938,
      "learning_rate": 0.00016664519858868886,
      "loss": 1.8586,
      "step": 3517
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2184851169586182,
      "learning_rate": 0.000166627310393957,
      "loss": 1.781,
      "step": 3518
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5168328285217285,
      "learning_rate": 0.00016660941836444017,
      "loss": 1.2061,
      "step": 3519
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.013925552368164,
      "learning_rate": 0.00016659152250116812,
      "loss": 1.1493,
      "step": 3520
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.19065523147583,
      "learning_rate": 0.00016657362280517087,
      "loss": 1.2363,
      "step": 3521
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4278645515441895,
      "learning_rate": 0.00016655571927747865,
      "loss": 1.4682,
      "step": 3522
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4070138931274414,
      "learning_rate": 0.0001665378119191219,
      "loss": 1.7928,
      "step": 3523
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.020989418029785,
      "learning_rate": 0.00016651990073113132,
      "loss": 1.8565,
      "step": 3524
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8912936449050903,
      "learning_rate": 0.0001665019857145378,
      "loss": 1.6488,
      "step": 3525
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2764129638671875,
      "learning_rate": 0.00016648406687037239,
      "loss": 1.9101,
      "step": 3526
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0206023454666138,
      "learning_rate": 0.0001664661441996665,
      "loss": 0.7385,
      "step": 3527
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.081341505050659,
      "learning_rate": 0.00016644821770345165,
      "loss": 1.8102,
      "step": 3528
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1643894910812378,
      "learning_rate": 0.0001664302873827596,
      "loss": 1.7741,
      "step": 3529
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.530444860458374,
      "learning_rate": 0.00016641235323862236,
      "loss": 2.0288,
      "step": 3530
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9109500646591187,
      "learning_rate": 0.00016639441527207215,
      "loss": 1.941,
      "step": 3531
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3053821325302124,
      "learning_rate": 0.00016637647348414141,
      "loss": 1.425,
      "step": 3532
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.456804633140564,
      "learning_rate": 0.00016635852787586275,
      "loss": 1.5911,
      "step": 3533
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0148301124572754,
      "learning_rate": 0.00016634057844826908,
      "loss": 1.7422,
      "step": 3534
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.425110340118408,
      "learning_rate": 0.0001663226252023935,
      "loss": 1.6381,
      "step": 3535
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2853714227676392,
      "learning_rate": 0.0001663046681392693,
      "loss": 1.5076,
      "step": 3536
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2679414749145508,
      "learning_rate": 0.00016628670725993004,
      "loss": 1.9923,
      "step": 3537
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.285263180732727,
      "learning_rate": 0.00016626874256540938,
      "loss": 2.0534,
      "step": 3538
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.091828465461731,
      "learning_rate": 0.00016625077405674142,
      "loss": 1.6727,
      "step": 3539
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9725737571716309,
      "learning_rate": 0.00016623280173496028,
      "loss": 1.2958,
      "step": 3540
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.2721707820892334,
      "learning_rate": 0.00016621482560110037,
      "loss": 2.0567,
      "step": 3541
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.208581566810608,
      "learning_rate": 0.00016619684565619635,
      "loss": 2.1978,
      "step": 3542
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.230804204940796,
      "learning_rate": 0.00016617886190128304,
      "loss": 1.7116,
      "step": 3543
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6186707019805908,
      "learning_rate": 0.00016616087433739556,
      "loss": 1.5199,
      "step": 3544
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8769662380218506,
      "learning_rate": 0.00016614288296556913,
      "loss": 1.5157,
      "step": 3545
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.12418532371521,
      "learning_rate": 0.00016612488778683927,
      "loss": 1.5125,
      "step": 3546
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1796780824661255,
      "learning_rate": 0.00016610688880224178,
      "loss": 0.9581,
      "step": 3547
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1264082193374634,
      "learning_rate": 0.00016608888601281246,
      "loss": 1.7636,
      "step": 3548
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.199976921081543,
      "learning_rate": 0.00016607087941958762,
      "loss": 1.7307,
      "step": 3549
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9956873059272766,
      "learning_rate": 0.00016605286902360357,
      "loss": 1.4466,
      "step": 3550
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.888396739959717,
      "learning_rate": 0.00016603485482589693,
      "loss": 1.2573,
      "step": 3551
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9800689220428467,
      "learning_rate": 0.00016601683682750454,
      "loss": 1.3095,
      "step": 3552
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.095541000366211,
      "learning_rate": 0.00016599881502946337,
      "loss": 0.668,
      "step": 3553
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8587194681167603,
      "learning_rate": 0.00016598078943281074,
      "loss": 1.4477,
      "step": 3554
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4205392599105835,
      "learning_rate": 0.00016596276003858412,
      "loss": 1.7314,
      "step": 3555
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1053285598754883,
      "learning_rate": 0.0001659447268478212,
      "loss": 1.4849,
      "step": 3556
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1246569156646729,
      "learning_rate": 0.00016592668986155986,
      "loss": 1.0872,
      "step": 3557
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.2110629081726074,
      "learning_rate": 0.00016590864908083832,
      "loss": 1.5382,
      "step": 3558
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.896745443344116,
      "learning_rate": 0.00016589060450669484,
      "loss": 2.4218,
      "step": 3559
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0164488554000854,
      "learning_rate": 0.000165872556140168,
      "loss": 1.5623,
      "step": 3560
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2718003988265991,
      "learning_rate": 0.00016585450398229665,
      "loss": 2.107,
      "step": 3561
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0786608457565308,
      "learning_rate": 0.00016583644803411972,
      "loss": 1.3488,
      "step": 3562
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0115480422973633,
      "learning_rate": 0.0001658183882966765,
      "loss": 1.5094,
      "step": 3563
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3075059652328491,
      "learning_rate": 0.0001658003247710064,
      "loss": 1.44,
      "step": 3564
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6104919910430908,
      "learning_rate": 0.00016578225745814907,
      "loss": 1.4711,
      "step": 3565
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.085065245628357,
      "learning_rate": 0.0001657641863591444,
      "loss": 1.7465,
      "step": 3566
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6448043584823608,
      "learning_rate": 0.00016574611147503247,
      "loss": 0.9788,
      "step": 3567
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.667344570159912,
      "learning_rate": 0.00016572803280685364,
      "loss": 1.6941,
      "step": 3568
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2794103622436523,
      "learning_rate": 0.0001657099503556484,
      "loss": 0.8074,
      "step": 3569
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3102208375930786,
      "learning_rate": 0.0001656918641224575,
      "loss": 1.3829,
      "step": 3570
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.968256950378418,
      "learning_rate": 0.0001656737741083219,
      "loss": 3.2954,
      "step": 3571
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2103959321975708,
      "learning_rate": 0.0001656556803142828,
      "loss": 1.5356,
      "step": 3572
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.765622138977051,
      "learning_rate": 0.00016563758274138163,
      "loss": 2.0479,
      "step": 3573
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6127322912216187,
      "learning_rate": 0.00016561948139065996,
      "loss": 1.5483,
      "step": 3574
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2168922424316406,
      "learning_rate": 0.00016560137626315963,
      "loss": 1.9328,
      "step": 3575
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2439016103744507,
      "learning_rate": 0.00016558326735992277,
      "loss": 1.6277,
      "step": 3576
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4229280948638916,
      "learning_rate": 0.00016556515468199153,
      "loss": 1.4036,
      "step": 3577
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6809947490692139,
      "learning_rate": 0.00016554703823040852,
      "loss": 1.6692,
      "step": 3578
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8660104274749756,
      "learning_rate": 0.00016552891800621634,
      "loss": 1.1906,
      "step": 3579
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9809156060218811,
      "learning_rate": 0.000165510794010458,
      "loss": 1.5536,
      "step": 3580
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.9612847566604614,
      "learning_rate": 0.00016549266624417658,
      "loss": 1.2279,
      "step": 3581
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2454755306243896,
      "learning_rate": 0.00016547453470841544,
      "loss": 1.0522,
      "step": 3582
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7049826383590698,
      "learning_rate": 0.0001654563994042182,
      "loss": 1.7504,
      "step": 3583
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.884405791759491,
      "learning_rate": 0.00016543826033262862,
      "loss": 1.0657,
      "step": 3584
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8622539043426514,
      "learning_rate": 0.0001654201174946907,
      "loss": 2.0441,
      "step": 3585
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2274762392044067,
      "learning_rate": 0.00016540197089144872,
      "loss": 2.0785,
      "step": 3586
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4468961954116821,
      "learning_rate": 0.00016538382052394703,
      "loss": 1.1512,
      "step": 3587
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0616977214813232,
      "learning_rate": 0.00016536566639323034,
      "loss": 1.4211,
      "step": 3588
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2600970268249512,
      "learning_rate": 0.00016534750850034355,
      "loss": 1.2021,
      "step": 3589
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.8361465930938721,
      "learning_rate": 0.0001653293468463317,
      "loss": 1.5733,
      "step": 3590
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.0575408935546875,
      "learning_rate": 0.00016531118143224013,
      "loss": 1.8844,
      "step": 3591
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9113876819610596,
      "learning_rate": 0.0001652930122591143,
      "loss": 1.1428,
      "step": 3592
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6571323871612549,
      "learning_rate": 0.0001652748393280001,
      "loss": 1.351,
      "step": 3593
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4220998287200928,
      "learning_rate": 0.00016525666263994334,
      "loss": 1.4349,
      "step": 3594
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.282011866569519,
      "learning_rate": 0.00016523848219599023,
      "loss": 1.3755,
      "step": 3595
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1277203559875488,
      "learning_rate": 0.00016522029799718722,
      "loss": 1.5086,
      "step": 3596
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.067221164703369,
      "learning_rate": 0.00016520211004458082,
      "loss": 2.2768,
      "step": 3597
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.863600015640259,
      "learning_rate": 0.0001651839183392179,
      "loss": 1.2696,
      "step": 3598
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.051105260848999,
      "learning_rate": 0.00016516572288214552,
      "loss": 0.5624,
      "step": 3599
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.144826889038086,
      "learning_rate": 0.00016514752367441092,
      "loss": 1.4689,
      "step": 3600
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3666446208953857,
      "learning_rate": 0.00016512932071706152,
      "loss": 1.3208,
      "step": 3601
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3931186199188232,
      "learning_rate": 0.00016511111401114507,
      "loss": 1.7247,
      "step": 3602
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3053462505340576,
      "learning_rate": 0.00016509290355770949,
      "loss": 1.5136,
      "step": 3603
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7515814304351807,
      "learning_rate": 0.00016507468935780277,
      "loss": 1.7435,
      "step": 3604
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.7283639907836914,
      "learning_rate": 0.0001650564714124734,
      "loss": 2.0002,
      "step": 3605
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.362043857574463,
      "learning_rate": 0.00016503824972276978,
      "loss": 1.5485,
      "step": 3606
    },
    {
      "epoch": 0.28,
      "grad_norm": 6.259393692016602,
      "learning_rate": 0.00016502002428974078,
      "loss": 3.3644,
      "step": 3607
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7502259016036987,
      "learning_rate": 0.00016500179511443533,
      "loss": 1.4477,
      "step": 3608
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1980724334716797,
      "learning_rate": 0.00016498356219790265,
      "loss": 0.928,
      "step": 3609
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5308185815811157,
      "learning_rate": 0.00016496532554119214,
      "loss": 1.4712,
      "step": 3610
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.17706561088562,
      "learning_rate": 0.0001649470851453534,
      "loss": 2.2437,
      "step": 3611
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2241299152374268,
      "learning_rate": 0.00016492884101143628,
      "loss": 1.3838,
      "step": 3612
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3271805047988892,
      "learning_rate": 0.00016491059314049084,
      "loss": 1.2529,
      "step": 3613
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.209463357925415,
      "learning_rate": 0.00016489234153356738,
      "loss": 1.4628,
      "step": 3614
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4015623331069946,
      "learning_rate": 0.00016487408619171638,
      "loss": 1.5915,
      "step": 3615
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2055639028549194,
      "learning_rate": 0.00016485582711598849,
      "loss": 2.148,
      "step": 3616
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2195043563842773,
      "learning_rate": 0.00016483756430743466,
      "loss": 1.8408,
      "step": 3617
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0749518871307373,
      "learning_rate": 0.000164819297767106,
      "loss": 1.507,
      "step": 3618
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2054356336593628,
      "learning_rate": 0.00016480102749605392,
      "loss": 1.4828,
      "step": 3619
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3980706930160522,
      "learning_rate": 0.00016478275349532986,
      "loss": 1.3985,
      "step": 3620
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4394692182540894,
      "learning_rate": 0.0001647644757659857,
      "loss": 0.9456,
      "step": 3621
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5178263187408447,
      "learning_rate": 0.00016474619430907337,
      "loss": 1.1005,
      "step": 3622
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2285826206207275,
      "learning_rate": 0.0001647279091256451,
      "loss": 1.8702,
      "step": 3623
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8310621976852417,
      "learning_rate": 0.00016470962021675333,
      "loss": 2.0712,
      "step": 3624
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.955743432044983,
      "learning_rate": 0.0001646913275834506,
      "loss": 1.6137,
      "step": 3625
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.34674870967865,
      "learning_rate": 0.00016467303122678987,
      "loss": 1.139,
      "step": 3626
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.3014118671417236,
      "learning_rate": 0.00016465473114782414,
      "loss": 2.5358,
      "step": 3627
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1185303926467896,
      "learning_rate": 0.0001646364273476067,
      "loss": 1.4704,
      "step": 3628
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3088083267211914,
      "learning_rate": 0.000164618119827191,
      "loss": 1.8808,
      "step": 3629
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2236626148223877,
      "learning_rate": 0.0001645998085876308,
      "loss": 1.1295,
      "step": 3630
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.478433609008789,
      "learning_rate": 0.00016458149362998,
      "loss": 2.0695,
      "step": 3631
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2971872091293335,
      "learning_rate": 0.00016456317495529272,
      "loss": 1.4982,
      "step": 3632
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5275726318359375,
      "learning_rate": 0.00016454485256462329,
      "loss": 1.791,
      "step": 3633
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.532941222190857,
      "learning_rate": 0.00016452652645902632,
      "loss": 1.8252,
      "step": 3634
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.26966392993927,
      "learning_rate": 0.00016450819663955653,
      "loss": 2.0531,
      "step": 3635
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9819982647895813,
      "learning_rate": 0.00016448986310726893,
      "loss": 1.0337,
      "step": 3636
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.369581699371338,
      "learning_rate": 0.0001644715258632187,
      "loss": 1.2455,
      "step": 3637
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5232219696044922,
      "learning_rate": 0.00016445318490846132,
      "loss": 1.4102,
      "step": 3638
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9087026119232178,
      "learning_rate": 0.00016443484024405235,
      "loss": 2.0647,
      "step": 3639
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0001798868179321,
      "learning_rate": 0.00016441649187104763,
      "loss": 1.6522,
      "step": 3640
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0598695278167725,
      "learning_rate": 0.00016439813979050326,
      "loss": 1.6729,
      "step": 3641
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0676381587982178,
      "learning_rate": 0.00016437978400347545,
      "loss": 1.4599,
      "step": 3642
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.0705060958862305,
      "learning_rate": 0.00016436142451102076,
      "loss": 2.4526,
      "step": 3643
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.922707200050354,
      "learning_rate": 0.0001643430613141958,
      "loss": 2.0285,
      "step": 3644
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.910861611366272,
      "learning_rate": 0.00016432469441405754,
      "loss": 1.6991,
      "step": 3645
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4053752422332764,
      "learning_rate": 0.00016430632381166305,
      "loss": 1.9267,
      "step": 3646
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6070095300674438,
      "learning_rate": 0.0001642879495080697,
      "loss": 1.7862,
      "step": 3647
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4420164823532104,
      "learning_rate": 0.00016426957150433505,
      "loss": 1.4045,
      "step": 3648
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2086913585662842,
      "learning_rate": 0.0001642511898015168,
      "loss": 1.7435,
      "step": 3649
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.073549509048462,
      "learning_rate": 0.00016423280440067302,
      "loss": 1.4898,
      "step": 3650
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5949207544326782,
      "learning_rate": 0.0001642144153028618,
      "loss": 1.595,
      "step": 3651
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.13427734375,
      "learning_rate": 0.00016419602250914155,
      "loss": 1.3779,
      "step": 3652
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.385571002960205,
      "learning_rate": 0.0001641776260205709,
      "loss": 1.2069,
      "step": 3653
    },
    {
      "epoch": 0.28,
      "grad_norm": 4.049564838409424,
      "learning_rate": 0.00016415922583820874,
      "loss": 2.5492,
      "step": 3654
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0337285995483398,
      "learning_rate": 0.000164140821963114,
      "loss": 0.8626,
      "step": 3655
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.154450535774231,
      "learning_rate": 0.00016412241439634599,
      "loss": 1.3707,
      "step": 3656
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3707160949707031,
      "learning_rate": 0.00016410400313896415,
      "loss": 2.3831,
      "step": 3657
    },
    {
      "epoch": 0.28,
      "grad_norm": 7.032108783721924,
      "learning_rate": 0.00016408558819202816,
      "loss": 1.7553,
      "step": 3658
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3815702199935913,
      "learning_rate": 0.0001640671695565979,
      "loss": 1.3238,
      "step": 3659
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.966362714767456,
      "learning_rate": 0.00016404874723373347,
      "loss": 1.05,
      "step": 3660
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1384726762771606,
      "learning_rate": 0.00016403032122449518,
      "loss": 2.8774,
      "step": 3661
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1167383193969727,
      "learning_rate": 0.0001640118915299436,
      "loss": 1.2333,
      "step": 3662
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4671510457992554,
      "learning_rate": 0.00016399345815113937,
      "loss": 1.829,
      "step": 3663
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3414260149002075,
      "learning_rate": 0.00016397502108914352,
      "loss": 1.5553,
      "step": 3664
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.25763738155365,
      "learning_rate": 0.00016395658034501717,
      "loss": 1.2516,
      "step": 3665
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3809459209442139,
      "learning_rate": 0.00016393813591982168,
      "loss": 1.7027,
      "step": 3666
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.208499550819397,
      "learning_rate": 0.00016391968781461868,
      "loss": 1.6388,
      "step": 3667
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0055158138275146,
      "learning_rate": 0.00016390123603046992,
      "loss": 0.8112,
      "step": 3668
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5207643508911133,
      "learning_rate": 0.00016388278056843746,
      "loss": 1.88,
      "step": 3669
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7870233058929443,
      "learning_rate": 0.00016386432142958342,
      "loss": 1.1075,
      "step": 3670
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.154591679573059,
      "learning_rate": 0.00016384585861497036,
      "loss": 1.8122,
      "step": 3671
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1249926090240479,
      "learning_rate": 0.0001638273921256608,
      "loss": 1.5982,
      "step": 3672
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.235715627670288,
      "learning_rate": 0.00016380892196271764,
      "loss": 1.9134,
      "step": 3673
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.7641308307647705,
      "learning_rate": 0.00016379044812720397,
      "loss": 1.7322,
      "step": 3674
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.459430456161499,
      "learning_rate": 0.00016377197062018302,
      "loss": 1.1806,
      "step": 3675
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.8656846284866333,
      "learning_rate": 0.0001637534894427183,
      "loss": 1.3236,
      "step": 3676
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3089499473571777,
      "learning_rate": 0.00016373500459587357,
      "loss": 2.3906,
      "step": 3677
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1681230068206787,
      "learning_rate": 0.00016371651608071262,
      "loss": 1.626,
      "step": 3678
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.991364061832428,
      "learning_rate": 0.00016369802389829964,
      "loss": 1.3135,
      "step": 3679
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9806962013244629,
      "learning_rate": 0.00016367952804969895,
      "loss": 1.2737,
      "step": 3680
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9167351126670837,
      "learning_rate": 0.0001636610285359751,
      "loss": 1.354,
      "step": 3681
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9674419164657593,
      "learning_rate": 0.00016364252535819282,
      "loss": 1.3233,
      "step": 3682
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6444637775421143,
      "learning_rate": 0.0001636240185174171,
      "loss": 1.8971,
      "step": 3683
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.341381788253784,
      "learning_rate": 0.0001636055080147131,
      "loss": 1.6675,
      "step": 3684
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1423031091690063,
      "learning_rate": 0.00016358699385114625,
      "loss": 1.4336,
      "step": 3685
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0954114198684692,
      "learning_rate": 0.00016356847602778206,
      "loss": 1.6487,
      "step": 3686
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1858983039855957,
      "learning_rate": 0.0001635499545456864,
      "loss": 1.3475,
      "step": 3687
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6162761449813843,
      "learning_rate": 0.00016353142940592528,
      "loss": 1.6487,
      "step": 3688
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.032231569290161,
      "learning_rate": 0.00016351290060956491,
      "loss": 1.3675,
      "step": 3689
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3108528852462769,
      "learning_rate": 0.00016349436815767175,
      "loss": 1.5855,
      "step": 3690
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0092530250549316,
      "learning_rate": 0.00016347583205131243,
      "loss": 1.2136,
      "step": 3691
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.0084609985351562,
      "learning_rate": 0.00016345729229155383,
      "loss": 1.6923,
      "step": 3692
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.0721940994262695,
      "learning_rate": 0.000163438748879463,
      "loss": 1.4154,
      "step": 3693
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3605051040649414,
      "learning_rate": 0.00016342020181610723,
      "loss": 1.7937,
      "step": 3694
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1848185062408447,
      "learning_rate": 0.000163401651102554,
      "loss": 1.4551,
      "step": 3695
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2492495775222778,
      "learning_rate": 0.00016338309673987101,
      "loss": 1.1468,
      "step": 3696
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.715075969696045,
      "learning_rate": 0.0001633645387291262,
      "loss": 1.6262,
      "step": 3697
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5560506582260132,
      "learning_rate": 0.0001633459770713877,
      "loss": 1.3362,
      "step": 3698
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2646591663360596,
      "learning_rate": 0.00016332741176772376,
      "loss": 1.4633,
      "step": 3699
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4492292404174805,
      "learning_rate": 0.000163308842819203,
      "loss": 2.2509,
      "step": 3700
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9158378839492798,
      "learning_rate": 0.00016329027022689413,
      "loss": 1.7113,
      "step": 3701
    },
    {
      "epoch": 0.28,
      "grad_norm": 5.724803447723389,
      "learning_rate": 0.0001632716939918661,
      "loss": 1.7547,
      "step": 3702
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6858603954315186,
      "learning_rate": 0.00016325311411518814,
      "loss": 0.6355,
      "step": 3703
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5665241479873657,
      "learning_rate": 0.00016323453059792958,
      "loss": 1.219,
      "step": 3704
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9346531629562378,
      "learning_rate": 0.00016321594344115997,
      "loss": 1.441,
      "step": 3705
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.462802529335022,
      "learning_rate": 0.0001631973526459492,
      "loss": 1.6436,
      "step": 3706
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.911217212677002,
      "learning_rate": 0.00016317875821336722,
      "loss": 1.2148,
      "step": 3707
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6306421756744385,
      "learning_rate": 0.00016316016014448424,
      "loss": 1.5315,
      "step": 3708
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6315785646438599,
      "learning_rate": 0.00016314155844037074,
      "loss": 1.6754,
      "step": 3709
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1254684925079346,
      "learning_rate": 0.00016312295310209727,
      "loss": 1.5061,
      "step": 3710
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6225942373275757,
      "learning_rate": 0.00016310434413073475,
      "loss": 2.2776,
      "step": 3711
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6960636377334595,
      "learning_rate": 0.00016308573152735417,
      "loss": 1.2624,
      "step": 3712
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1643285751342773,
      "learning_rate": 0.0001630671152930269,
      "loss": 1.2494,
      "step": 3713
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8979394435882568,
      "learning_rate": 0.00016304849542882427,
      "loss": 1.3981,
      "step": 3714
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.747098207473755,
      "learning_rate": 0.00016302987193581806,
      "loss": 1.9611,
      "step": 3715
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4111007452011108,
      "learning_rate": 0.00016301124481508014,
      "loss": 1.2088,
      "step": 3716
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2505525350570679,
      "learning_rate": 0.00016299261406768257,
      "loss": 1.7888,
      "step": 3717
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2838854789733887,
      "learning_rate": 0.00016297397969469772,
      "loss": 1.7686,
      "step": 3718
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.8625898957252502,
      "learning_rate": 0.00016295534169719807,
      "loss": 1.0266,
      "step": 3719
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.074798107147217,
      "learning_rate": 0.00016293670007625627,
      "loss": 2.0614,
      "step": 3720
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9957729578018188,
      "learning_rate": 0.0001629180548329454,
      "loss": 1.821,
      "step": 3721
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.259995698928833,
      "learning_rate": 0.0001628994059683385,
      "loss": 1.5673,
      "step": 3722
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9132055044174194,
      "learning_rate": 0.00016288075348350894,
      "loss": 1.6474,
      "step": 3723
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1755367517471313,
      "learning_rate": 0.00016286209737953028,
      "loss": 1.32,
      "step": 3724
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.6464052200317383,
      "learning_rate": 0.0001628434376574763,
      "loss": 1.8236,
      "step": 3725
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.25032639503479,
      "learning_rate": 0.00016282477431842098,
      "loss": 1.5298,
      "step": 3726
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1353816986083984,
      "learning_rate": 0.00016280610736343847,
      "loss": 1.2861,
      "step": 3727
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3955641984939575,
      "learning_rate": 0.00016278743679360317,
      "loss": 1.8069,
      "step": 3728
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3720319271087646,
      "learning_rate": 0.00016276876260998974,
      "loss": 1.6788,
      "step": 3729
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8312371969223022,
      "learning_rate": 0.00016275008481367287,
      "loss": 0.7721,
      "step": 3730
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6194770336151123,
      "learning_rate": 0.00016273140340572763,
      "loss": 1.2232,
      "step": 3731
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7861683368682861,
      "learning_rate": 0.0001627127183872293,
      "loss": 2.0891,
      "step": 3732
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1116703748703003,
      "learning_rate": 0.00016269402975925323,
      "loss": 1.0174,
      "step": 3733
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.9947541356086731,
      "learning_rate": 0.00016267533752287508,
      "loss": 2.0269,
      "step": 3734
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3259527683258057,
      "learning_rate": 0.00016265664167917075,
      "loss": 1.7791,
      "step": 3735
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.733161449432373,
      "learning_rate": 0.0001626379422292162,
      "loss": 1.4429,
      "step": 3736
    },
    {
      "epoch": 0.29,
      "grad_norm": 4.213027477264404,
      "learning_rate": 0.00016261923917408776,
      "loss": 2.2551,
      "step": 3737
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6167073249816895,
      "learning_rate": 0.00016260053251486187,
      "loss": 1.8212,
      "step": 3738
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.204943060874939,
      "learning_rate": 0.0001625818222526152,
      "loss": 1.938,
      "step": 3739
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1885287761688232,
      "learning_rate": 0.00016256310838842465,
      "loss": 0.9919,
      "step": 3740
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9856343865394592,
      "learning_rate": 0.00016254439092336736,
      "loss": 1.3913,
      "step": 3741
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1881065368652344,
      "learning_rate": 0.00016252566985852052,
      "loss": 1.7519,
      "step": 3742
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0697648525238037,
      "learning_rate": 0.0001625069451949617,
      "loss": 1.501,
      "step": 3743
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4154447317123413,
      "learning_rate": 0.00016248821693376858,
      "loss": 1.3514,
      "step": 3744
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.987034022808075,
      "learning_rate": 0.00016246948507601914,
      "loss": 1.3776,
      "step": 3745
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.3201472759246826,
      "learning_rate": 0.00016245074962279143,
      "loss": 2.1929,
      "step": 3746
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.063941240310669,
      "learning_rate": 0.00016243201057516385,
      "loss": 2.2087,
      "step": 3747
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.738653302192688,
      "learning_rate": 0.00016241326793421489,
      "loss": 1.8246,
      "step": 3748
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1630146503448486,
      "learning_rate": 0.00016239452170102331,
      "loss": 1.3829,
      "step": 3749
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3128950595855713,
      "learning_rate": 0.00016237577187666808,
      "loss": 1.1133,
      "step": 3750
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.709554672241211,
      "learning_rate": 0.00016235701846222837,
      "loss": 1.8357,
      "step": 3751
    },
    {
      "epoch": 0.29,
      "grad_norm": 4.0583624839782715,
      "learning_rate": 0.00016233826145878351,
      "loss": 1.7864,
      "step": 3752
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0819939374923706,
      "learning_rate": 0.0001623195008674131,
      "loss": 1.3589,
      "step": 3753
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4409947395324707,
      "learning_rate": 0.0001623007366891969,
      "loss": 1.2961,
      "step": 3754
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0245416164398193,
      "learning_rate": 0.0001622819689252149,
      "loss": 0.836,
      "step": 3755
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.04897141456604,
      "learning_rate": 0.0001622631975765473,
      "loss": 1.9314,
      "step": 3756
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3081769943237305,
      "learning_rate": 0.00016224442264427453,
      "loss": 1.6889,
      "step": 3757
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2381503582000732,
      "learning_rate": 0.0001622256441294771,
      "loss": 1.0053,
      "step": 3758
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5563077926635742,
      "learning_rate": 0.00016220686203323597,
      "loss": 1.2696,
      "step": 3759
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5130962133407593,
      "learning_rate": 0.00016218807635663202,
      "loss": 1.7619,
      "step": 3760
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.251133918762207,
      "learning_rate": 0.00016216928710074654,
      "loss": 1.0612,
      "step": 3761
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2084338665008545,
      "learning_rate": 0.00016215049426666095,
      "loss": 1.5152,
      "step": 3762
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.892307996749878,
      "learning_rate": 0.0001621316978554569,
      "loss": 1.4605,
      "step": 3763
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4179401397705078,
      "learning_rate": 0.00016211289786821615,
      "loss": 1.105,
      "step": 3764
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1297919750213623,
      "learning_rate": 0.00016209409430602086,
      "loss": 1.6098,
      "step": 3765
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6645734310150146,
      "learning_rate": 0.00016207528716995323,
      "loss": 1.3485,
      "step": 3766
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.044870615005493,
      "learning_rate": 0.00016205647646109572,
      "loss": 1.8077,
      "step": 3767
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6422994136810303,
      "learning_rate": 0.00016203766218053102,
      "loss": 1.1626,
      "step": 3768
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2768864631652832,
      "learning_rate": 0.00016201884432934195,
      "loss": 1.5666,
      "step": 3769
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6429927349090576,
      "learning_rate": 0.0001620000229086116,
      "loss": 1.7643,
      "step": 3770
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5012739896774292,
      "learning_rate": 0.00016198119791942332,
      "loss": 1.4328,
      "step": 3771
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4621524810791016,
      "learning_rate": 0.0001619623693628605,
      "loss": 1.9197,
      "step": 3772
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.679966688156128,
      "learning_rate": 0.00016194353724000688,
      "loss": 0.8944,
      "step": 3773
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0721510648727417,
      "learning_rate": 0.00016192470155194634,
      "loss": 1.2318,
      "step": 3774
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2104551792144775,
      "learning_rate": 0.00016190586229976304,
      "loss": 1.718,
      "step": 3775
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3598008155822754,
      "learning_rate": 0.00016188701948454117,
      "loss": 1.0321,
      "step": 3776
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1788725852966309,
      "learning_rate": 0.00016186817310736537,
      "loss": 1.7054,
      "step": 3777
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7000430822372437,
      "learning_rate": 0.00016184932316932026,
      "loss": 1.3692,
      "step": 3778
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.1730761528015137,
      "learning_rate": 0.00016183046967149081,
      "loss": 1.5849,
      "step": 3779
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.516176700592041,
      "learning_rate": 0.00016181161261496216,
      "loss": 1.3168,
      "step": 3780
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1628777980804443,
      "learning_rate": 0.0001617927520008196,
      "loss": 1.3982,
      "step": 3781
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8187378644943237,
      "learning_rate": 0.00016177388783014868,
      "loss": 1.6396,
      "step": 3782
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.26145601272583,
      "learning_rate": 0.0001617550201040352,
      "loss": 1.6582,
      "step": 3783
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.043799638748169,
      "learning_rate": 0.00016173614882356501,
      "loss": 1.5676,
      "step": 3784
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0383738279342651,
      "learning_rate": 0.00016171727398982432,
      "loss": 1.1164,
      "step": 3785
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5944223403930664,
      "learning_rate": 0.00016169839560389946,
      "loss": 1.893,
      "step": 3786
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5303335189819336,
      "learning_rate": 0.00016167951366687704,
      "loss": 1.3981,
      "step": 3787
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1808058023452759,
      "learning_rate": 0.00016166062817984372,
      "loss": 1.4027,
      "step": 3788
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9584152698516846,
      "learning_rate": 0.00016164173914388658,
      "loss": 1.8189,
      "step": 3789
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.637157917022705,
      "learning_rate": 0.00016162284656009274,
      "loss": 1.7127,
      "step": 3790
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7554274797439575,
      "learning_rate": 0.0001616039504295496,
      "loss": 1.4456,
      "step": 3791
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8778538703918457,
      "learning_rate": 0.0001615850507533447,
      "loss": 1.7607,
      "step": 3792
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4257892370224,
      "learning_rate": 0.0001615661475325658,
      "loss": 1.3836,
      "step": 3793
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1029869318008423,
      "learning_rate": 0.00016154724076830102,
      "loss": 1.4319,
      "step": 3794
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4747872352600098,
      "learning_rate": 0.00016152833046163844,
      "loss": 0.7435,
      "step": 3795
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8607186079025269,
      "learning_rate": 0.0001615094166136665,
      "loss": 1.5896,
      "step": 3796
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4276692867279053,
      "learning_rate": 0.00016149049922547379,
      "loss": 1.715,
      "step": 3797
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4770888090133667,
      "learning_rate": 0.0001614715782981491,
      "loss": 1.5115,
      "step": 3798
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6543824672698975,
      "learning_rate": 0.00016145265383278145,
      "loss": 1.1391,
      "step": 3799
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.679032564163208,
      "learning_rate": 0.00016143372583046008,
      "loss": 0.9181,
      "step": 3800
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6913995742797852,
      "learning_rate": 0.00016141479429227436,
      "loss": 1.547,
      "step": 3801
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.272648572921753,
      "learning_rate": 0.00016139585921931394,
      "loss": 1.3362,
      "step": 3802
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6976673603057861,
      "learning_rate": 0.0001613769206126686,
      "loss": 2.1349,
      "step": 3803
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5311371088027954,
      "learning_rate": 0.00016135797847342844,
      "loss": 1.6921,
      "step": 3804
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.642866611480713,
      "learning_rate": 0.00016133903280268362,
      "loss": 1.8189,
      "step": 3805
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6635615825653076,
      "learning_rate": 0.0001613200836015246,
      "loss": 2.4381,
      "step": 3806
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0231759548187256,
      "learning_rate": 0.00016130113087104204,
      "loss": 1.6947,
      "step": 3807
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3100545406341553,
      "learning_rate": 0.00016128217461232672,
      "loss": 1.4385,
      "step": 3808
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9052218198776245,
      "learning_rate": 0.00016126321482646971,
      "loss": 1.651,
      "step": 3809
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1582894325256348,
      "learning_rate": 0.00016124425151456227,
      "loss": 1.4279,
      "step": 3810
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4125858545303345,
      "learning_rate": 0.00016122528467769583,
      "loss": 1.7194,
      "step": 3811
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3901968002319336,
      "learning_rate": 0.00016120631431696208,
      "loss": 1.5275,
      "step": 3812
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4288692474365234,
      "learning_rate": 0.00016118734043345283,
      "loss": 1.6315,
      "step": 3813
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9005610942840576,
      "learning_rate": 0.0001611683630282601,
      "loss": 1.5567,
      "step": 3814
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3775055408477783,
      "learning_rate": 0.00016114938210247623,
      "loss": 1.2584,
      "step": 3815
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.0851072072982788,
      "learning_rate": 0.00016113039765719366,
      "loss": 1.4261,
      "step": 3816
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.157731771469116,
      "learning_rate": 0.00016111140969350503,
      "loss": 1.7388,
      "step": 3817
    },
    {
      "epoch": 0.29,
      "grad_norm": 5.816032886505127,
      "learning_rate": 0.0001610924182125032,
      "loss": 2.149,
      "step": 3818
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9704949855804443,
      "learning_rate": 0.00016107342321528128,
      "loss": 1.0661,
      "step": 3819
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4428822994232178,
      "learning_rate": 0.0001610544247029325,
      "loss": 1.3889,
      "step": 3820
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8427586555480957,
      "learning_rate": 0.00016103542267655033,
      "loss": 0.9149,
      "step": 3821
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6088428497314453,
      "learning_rate": 0.00016101641713722856,
      "loss": 1.4448,
      "step": 3822
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1035187244415283,
      "learning_rate": 0.00016099740808606086,
      "loss": 1.8601,
      "step": 3823
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8267629146575928,
      "learning_rate": 0.0001609783955241415,
      "loss": 1.8747,
      "step": 3824
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.8746976852416992,
      "learning_rate": 0.00016095937945256465,
      "loss": 1.4317,
      "step": 3825
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.8443641662597656,
      "learning_rate": 0.00016094035987242484,
      "loss": 0.8352,
      "step": 3826
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5230557918548584,
      "learning_rate": 0.00016092133678481676,
      "loss": 1.4607,
      "step": 3827
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6284191608428955,
      "learning_rate": 0.0001609023101908353,
      "loss": 1.4968,
      "step": 3828
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9373236894607544,
      "learning_rate": 0.00016088328009157551,
      "loss": 1.4653,
      "step": 3829
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2231576442718506,
      "learning_rate": 0.00016086424648813273,
      "loss": 0.9289,
      "step": 3830
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.038938283920288,
      "learning_rate": 0.00016084520938160242,
      "loss": 1.6914,
      "step": 3831
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2351700067520142,
      "learning_rate": 0.00016082616877308026,
      "loss": 1.5747,
      "step": 3832
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6899502277374268,
      "learning_rate": 0.0001608071246636622,
      "loss": 1.3009,
      "step": 3833
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8374736309051514,
      "learning_rate": 0.00016078807705444432,
      "loss": 1.8575,
      "step": 3834
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3218406438827515,
      "learning_rate": 0.0001607690259465229,
      "loss": 1.5245,
      "step": 3835
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4725397825241089,
      "learning_rate": 0.00016074997134099444,
      "loss": 1.2229,
      "step": 3836
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2250932455062866,
      "learning_rate": 0.0001607309132389557,
      "loss": 1.6389,
      "step": 3837
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2845009565353394,
      "learning_rate": 0.00016071185164150348,
      "loss": 2.027,
      "step": 3838
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2224713563919067,
      "learning_rate": 0.00016069278654973497,
      "loss": 1.5057,
      "step": 3839
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4202356338500977,
      "learning_rate": 0.00016067371796474743,
      "loss": 1.7179,
      "step": 3840
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2300195693969727,
      "learning_rate": 0.0001606546458876384,
      "loss": 1.405,
      "step": 3841
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.620659589767456,
      "learning_rate": 0.00016063557031950558,
      "loss": 1.7056,
      "step": 3842
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9695338606834412,
      "learning_rate": 0.00016061649126144687,
      "loss": 1.3283,
      "step": 3843
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5077906847000122,
      "learning_rate": 0.00016059740871456036,
      "loss": 1.9136,
      "step": 3844
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.401787519454956,
      "learning_rate": 0.0001605783226799444,
      "loss": 1.2341,
      "step": 3845
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9464763402938843,
      "learning_rate": 0.00016055923315869746,
      "loss": 1.0135,
      "step": 3846
    },
    {
      "epoch": 0.29,
      "grad_norm": 4.330577373504639,
      "learning_rate": 0.0001605401401519183,
      "loss": 2.1647,
      "step": 3847
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6036375761032104,
      "learning_rate": 0.00016052104366070578,
      "loss": 1.5509,
      "step": 3848
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1346461772918701,
      "learning_rate": 0.00016050194368615904,
      "loss": 1.3554,
      "step": 3849
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9325615167617798,
      "learning_rate": 0.0001604828402293774,
      "loss": 2.2385,
      "step": 3850
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2355527877807617,
      "learning_rate": 0.0001604637332914604,
      "loss": 2.0459,
      "step": 3851
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.107098340988159,
      "learning_rate": 0.00016044462287350767,
      "loss": 1.629,
      "step": 3852
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.9822937250137329,
      "learning_rate": 0.0001604255089766192,
      "loss": 1.9039,
      "step": 3853
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.720059871673584,
      "learning_rate": 0.00016040639160189504,
      "loss": 1.6236,
      "step": 3854
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9412903785705566,
      "learning_rate": 0.00016038727075043562,
      "loss": 1.4839,
      "step": 3855
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.004899501800537,
      "learning_rate": 0.0001603681464233413,
      "loss": 2.1445,
      "step": 3856
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.219973087310791,
      "learning_rate": 0.00016034901862171293,
      "loss": 1.3126,
      "step": 3857
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4479748010635376,
      "learning_rate": 0.00016032988734665136,
      "loss": 1.8038,
      "step": 3858
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.067011833190918,
      "learning_rate": 0.0001603107525992577,
      "loss": 1.5603,
      "step": 3859
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.055162787437439,
      "learning_rate": 0.00016029161438063327,
      "loss": 1.6065,
      "step": 3860
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.413299083709717,
      "learning_rate": 0.00016027247269187963,
      "loss": 1.5931,
      "step": 3861
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7792036533355713,
      "learning_rate": 0.0001602533275340984,
      "loss": 1.8202,
      "step": 3862
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.551669716835022,
      "learning_rate": 0.0001602341789083916,
      "loss": 1.3756,
      "step": 3863
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.265211820602417,
      "learning_rate": 0.00016021502681586128,
      "loss": 1.0811,
      "step": 3864
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.8954821825027466,
      "learning_rate": 0.00016019587125760978,
      "loss": 1.0016,
      "step": 3865
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4046013355255127,
      "learning_rate": 0.0001601767122347396,
      "loss": 1.791,
      "step": 3866
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2298250198364258,
      "learning_rate": 0.00016015754974835344,
      "loss": 1.6095,
      "step": 3867
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9545756578445435,
      "learning_rate": 0.00016013838379955422,
      "loss": 1.4073,
      "step": 3868
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.00017511844635,
      "learning_rate": 0.00016011921438944507,
      "loss": 0.9373,
      "step": 3869
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0618467330932617,
      "learning_rate": 0.00016010004151912927,
      "loss": 1.3358,
      "step": 3870
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1973522901535034,
      "learning_rate": 0.00016008086518971037,
      "loss": 1.7435,
      "step": 3871
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.132187843322754,
      "learning_rate": 0.00016006168540229205,
      "loss": 1.2885,
      "step": 3872
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7296195030212402,
      "learning_rate": 0.00016004250215797817,
      "loss": 0.7609,
      "step": 3873
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.308681845664978,
      "learning_rate": 0.00016002331545787293,
      "loss": 1.9071,
      "step": 3874
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.204052448272705,
      "learning_rate": 0.00016000412530308057,
      "loss": 1.1992,
      "step": 3875
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.505396842956543,
      "learning_rate": 0.00015998493169470563,
      "loss": 1.5531,
      "step": 3876
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9717725515365601,
      "learning_rate": 0.00015996573463385278,
      "loss": 1.578,
      "step": 3877
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3662941455841064,
      "learning_rate": 0.000159946534121627,
      "loss": 1.3376,
      "step": 3878
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2252181768417358,
      "learning_rate": 0.00015992733015913324,
      "loss": 1.3999,
      "step": 3879
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.147063970565796,
      "learning_rate": 0.00015990812274747692,
      "loss": 1.3381,
      "step": 3880
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4846415519714355,
      "learning_rate": 0.00015988891188776353,
      "loss": 1.9209,
      "step": 3881
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2965478897094727,
      "learning_rate": 0.00015986969758109872,
      "loss": 1.2003,
      "step": 3882
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4687979221343994,
      "learning_rate": 0.00015985047982858838,
      "loss": 1.7032,
      "step": 3883
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1224658489227295,
      "learning_rate": 0.00015983125863133863,
      "loss": 1.664,
      "step": 3884
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.489944338798523,
      "learning_rate": 0.00015981203399045578,
      "loss": 1.8277,
      "step": 3885
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1769884824752808,
      "learning_rate": 0.00015979280590704627,
      "loss": 1.5216,
      "step": 3886
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0935990810394287,
      "learning_rate": 0.00015977357438221683,
      "loss": 1.0983,
      "step": 3887
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.519109010696411,
      "learning_rate": 0.00015975433941707432,
      "loss": 1.2188,
      "step": 3888
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.105968713760376,
      "learning_rate": 0.00015973510101272581,
      "loss": 1.2785,
      "step": 3889
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.284377098083496,
      "learning_rate": 0.00015971585917027862,
      "loss": 1.7624,
      "step": 3890
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8035017251968384,
      "learning_rate": 0.0001596966138908402,
      "loss": 2.3131,
      "step": 3891
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.131723403930664,
      "learning_rate": 0.00015967736517551827,
      "loss": 1.6058,
      "step": 3892
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7131173610687256,
      "learning_rate": 0.00015965811302542061,
      "loss": 1.6132,
      "step": 3893
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2219080924987793,
      "learning_rate": 0.0001596388574416554,
      "loss": 2.0485,
      "step": 3894
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5427892208099365,
      "learning_rate": 0.00015961959842533083,
      "loss": 0.9657,
      "step": 3895
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.529392719268799,
      "learning_rate": 0.0001596003359775554,
      "loss": 2.3353,
      "step": 3896
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0293124914169312,
      "learning_rate": 0.00015958107009943779,
      "loss": 1.5666,
      "step": 3897
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0747177600860596,
      "learning_rate": 0.00015956180079208682,
      "loss": 0.9308,
      "step": 3898
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3440934419631958,
      "learning_rate": 0.00015954252805661158,
      "loss": 1.3026,
      "step": 3899
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.401954174041748,
      "learning_rate": 0.00015952325189412135,
      "loss": 1.9758,
      "step": 3900
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0386810302734375,
      "learning_rate": 0.00015950397230572554,
      "loss": 1.7186,
      "step": 3901
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2151648998260498,
      "learning_rate": 0.00015948468929253382,
      "loss": 1.6859,
      "step": 3902
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.554107427597046,
      "learning_rate": 0.000159465402855656,
      "loss": 1.827,
      "step": 3903
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.296499490737915,
      "learning_rate": 0.0001594461129962022,
      "loss": 1.4439,
      "step": 3904
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4734090566635132,
      "learning_rate": 0.0001594268197152826,
      "loss": 1.525,
      "step": 3905
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0726540088653564,
      "learning_rate": 0.00015940752301400765,
      "loss": 1.9378,
      "step": 3906
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.18789541721344,
      "learning_rate": 0.00015938822289348804,
      "loss": 1.2423,
      "step": 3907
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.891474187374115,
      "learning_rate": 0.00015936891935483454,
      "loss": 0.8184,
      "step": 3908
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.488506555557251,
      "learning_rate": 0.00015934961239915817,
      "loss": 1.1939,
      "step": 3909
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.29972767829895,
      "learning_rate": 0.00015933030202757022,
      "loss": 1.1801,
      "step": 3910
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.7303318977355957,
      "learning_rate": 0.00015931098824118207,
      "loss": 1.6751,
      "step": 3911
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.7793150544166565,
      "learning_rate": 0.00015929167104110535,
      "loss": 0.7839,
      "step": 3912
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1679553985595703,
      "learning_rate": 0.0001592723504284519,
      "loss": 1.3867,
      "step": 3913
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.474737286567688,
      "learning_rate": 0.00015925302640433365,
      "loss": 1.5055,
      "step": 3914
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4126379489898682,
      "learning_rate": 0.00015923369896986294,
      "loss": 1.6276,
      "step": 3915
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.172542691230774,
      "learning_rate": 0.00015921436812615204,
      "loss": 1.3947,
      "step": 3916
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6660956144332886,
      "learning_rate": 0.00015919503387431364,
      "loss": 0.96,
      "step": 3917
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.326629161834717,
      "learning_rate": 0.0001591756962154605,
      "loss": 1.2558,
      "step": 3918
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.470954418182373,
      "learning_rate": 0.00015915635515070564,
      "loss": 2.2969,
      "step": 3919
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.233787178993225,
      "learning_rate": 0.00015913701068116223,
      "loss": 1.594,
      "step": 3920
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5052798986434937,
      "learning_rate": 0.00015911766280794364,
      "loss": 2.2493,
      "step": 3921
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3612877130508423,
      "learning_rate": 0.00015909831153216347,
      "loss": 1.235,
      "step": 3922
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.6744251847267151,
      "learning_rate": 0.00015907895685493553,
      "loss": 0.9464,
      "step": 3923
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.446648359298706,
      "learning_rate": 0.0001590595987773737,
      "loss": 1.6084,
      "step": 3924
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9405522346496582,
      "learning_rate": 0.00015904023730059228,
      "loss": 0.9318,
      "step": 3925
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0810449123382568,
      "learning_rate": 0.00015902087242570552,
      "loss": 1.0576,
      "step": 3926
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3492615222930908,
      "learning_rate": 0.00015900150415382804,
      "loss": 1.3703,
      "step": 3927
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5969834327697754,
      "learning_rate": 0.00015898213248607458,
      "loss": 1.7818,
      "step": 3928
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3107248544692993,
      "learning_rate": 0.00015896275742356008,
      "loss": 1.1707,
      "step": 3929
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2916381359100342,
      "learning_rate": 0.0001589433789673997,
      "loss": 1.528,
      "step": 3930
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1446717977523804,
      "learning_rate": 0.00015892399711870878,
      "loss": 1.7058,
      "step": 3931
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.297234296798706,
      "learning_rate": 0.00015890461187860285,
      "loss": 1.5315,
      "step": 3932
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6453083753585815,
      "learning_rate": 0.00015888522324819765,
      "loss": 1.6731,
      "step": 3933
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0398616790771484,
      "learning_rate": 0.0001588658312286091,
      "loss": 1.6033,
      "step": 3934
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.139634132385254,
      "learning_rate": 0.00015884643582095333,
      "loss": 1.7413,
      "step": 3935
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.944981575012207,
      "learning_rate": 0.00015882703702634662,
      "loss": 1.2838,
      "step": 3936
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.358798861503601,
      "learning_rate": 0.00015880763484590557,
      "loss": 1.0163,
      "step": 3937
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5673645734786987,
      "learning_rate": 0.00015878822928074683,
      "loss": 1.4402,
      "step": 3938
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3477014303207397,
      "learning_rate": 0.00015876882033198726,
      "loss": 1.8273,
      "step": 3939
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4370341300964355,
      "learning_rate": 0.00015874940800074402,
      "loss": 1.8627,
      "step": 3940
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2673227787017822,
      "learning_rate": 0.00015872999228813442,
      "loss": 1.224,
      "step": 3941
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.112769365310669,
      "learning_rate": 0.00015871057319527586,
      "loss": 1.5334,
      "step": 3942
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.6814358234405518,
      "learning_rate": 0.0001586911507232861,
      "loss": 2.2473,
      "step": 3943
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2061277627944946,
      "learning_rate": 0.00015867172487328294,
      "loss": 1.9901,
      "step": 3944
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0631474256515503,
      "learning_rate": 0.00015865229564638453,
      "loss": 1.5243,
      "step": 3945
    },
    {
      "epoch": 0.3,
      "grad_norm": 4.893720626831055,
      "learning_rate": 0.0001586328630437091,
      "loss": 1.9236,
      "step": 3946
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3380192518234253,
      "learning_rate": 0.0001586134270663751,
      "loss": 1.477,
      "step": 3947
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4696624279022217,
      "learning_rate": 0.0001585939877155012,
      "loss": 1.4764,
      "step": 3948
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.0969746112823486,
      "learning_rate": 0.0001585745449922062,
      "loss": 1.3824,
      "step": 3949
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4517844915390015,
      "learning_rate": 0.00015855509889760918,
      "loss": 1.1016,
      "step": 3950
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.712597131729126,
      "learning_rate": 0.00015853564943282939,
      "loss": 2.0262,
      "step": 3951
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6418923139572144,
      "learning_rate": 0.00015851619659898623,
      "loss": 1.4332,
      "step": 3952
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4010775089263916,
      "learning_rate": 0.00015849674039719933,
      "loss": 1.2496,
      "step": 3953
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2332268953323364,
      "learning_rate": 0.0001584772808285885,
      "loss": 1.5264,
      "step": 3954
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6616755723953247,
      "learning_rate": 0.00015845781789427377,
      "loss": 1.4965,
      "step": 3955
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0852241516113281,
      "learning_rate": 0.00015843835159537532,
      "loss": 1.2083,
      "step": 3956
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.8280673027038574,
      "learning_rate": 0.00015841888193301358,
      "loss": 1.7426,
      "step": 3957
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.138126254081726,
      "learning_rate": 0.0001583994089083091,
      "loss": 1.5725,
      "step": 3958
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5112636089324951,
      "learning_rate": 0.0001583799325223827,
      "loss": 1.7385,
      "step": 3959
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3589348793029785,
      "learning_rate": 0.00015836045277635533,
      "loss": 1.5169,
      "step": 3960
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3930798768997192,
      "learning_rate": 0.00015834096967134816,
      "loss": 1.6811,
      "step": 3961
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.9656102657318115,
      "learning_rate": 0.0001583214832084826,
      "loss": 1.8212,
      "step": 3962
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5371625423431396,
      "learning_rate": 0.00015830199338888012,
      "loss": 1.2701,
      "step": 3963
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4511140584945679,
      "learning_rate": 0.00015828250021366258,
      "loss": 1.7008,
      "step": 3964
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4485794305801392,
      "learning_rate": 0.00015826300368395186,
      "loss": 1.2395,
      "step": 3965
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0641224384307861,
      "learning_rate": 0.0001582435038008701,
      "loss": 1.5863,
      "step": 3966
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.214815616607666,
      "learning_rate": 0.00015822400056553963,
      "loss": 1.482,
      "step": 3967
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.257849931716919,
      "learning_rate": 0.00015820449397908299,
      "loss": 1.7921,
      "step": 3968
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.1053626537323,
      "learning_rate": 0.00015818498404262293,
      "loss": 1.5394,
      "step": 3969
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1240147352218628,
      "learning_rate": 0.00015816547075728226,
      "loss": 1.8833,
      "step": 3970
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1349568367004395,
      "learning_rate": 0.00015814595412418417,
      "loss": 1.4549,
      "step": 3971
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9543851017951965,
      "learning_rate": 0.00015812643414445194,
      "loss": 1.502,
      "step": 3972
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1909139156341553,
      "learning_rate": 0.00015810691081920902,
      "loss": 1.5588,
      "step": 3973
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.9517868161201477,
      "learning_rate": 0.0001580873841495791,
      "loss": 1.6659,
      "step": 3974
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.780868649482727,
      "learning_rate": 0.00015806785413668606,
      "loss": 1.5799,
      "step": 3975
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4539562463760376,
      "learning_rate": 0.00015804832078165402,
      "loss": 1.5931,
      "step": 3976
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.077173113822937,
      "learning_rate": 0.00015802878408560712,
      "loss": 1.1436,
      "step": 3977
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8055979013442993,
      "learning_rate": 0.00015800924404966993,
      "loss": 2.1837,
      "step": 3978
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7141029834747314,
      "learning_rate": 0.000157989700674967,
      "loss": 1.4191,
      "step": 3979
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.882847785949707,
      "learning_rate": 0.0001579701539626232,
      "loss": 1.9388,
      "step": 3980
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5761678218841553,
      "learning_rate": 0.00015795060391376358,
      "loss": 1.6002,
      "step": 3981
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5931499004364014,
      "learning_rate": 0.00015793105052951334,
      "loss": 2.0358,
      "step": 3982
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0150607824325562,
      "learning_rate": 0.00015791149381099784,
      "loss": 1.3029,
      "step": 3983
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3129829168319702,
      "learning_rate": 0.00015789193375934275,
      "loss": 1.376,
      "step": 3984
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.289917469024658,
      "learning_rate": 0.00015787237037567385,
      "loss": 1.2665,
      "step": 3985
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.245530366897583,
      "learning_rate": 0.00015785280366111708,
      "loss": 0.8767,
      "step": 3986
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0232126712799072,
      "learning_rate": 0.00015783323361679864,
      "loss": 1.9368,
      "step": 3987
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.0550415515899658,
      "learning_rate": 0.00015781366024384495,
      "loss": 1.3903,
      "step": 3988
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0097477436065674,
      "learning_rate": 0.00015779408354338253,
      "loss": 2.0747,
      "step": 3989
    },
    {
      "epoch": 0.3,
      "grad_norm": 4.042416095733643,
      "learning_rate": 0.0001577745035165381,
      "loss": 1.3057,
      "step": 3990
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4672178030014038,
      "learning_rate": 0.00015775492016443867,
      "loss": 1.7896,
      "step": 3991
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3133282661437988,
      "learning_rate": 0.00015773533348821132,
      "loss": 1.2484,
      "step": 3992
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1747599840164185,
      "learning_rate": 0.00015771574348898338,
      "loss": 1.4847,
      "step": 3993
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.0188369750976562,
      "learning_rate": 0.00015769615016788242,
      "loss": 2.4185,
      "step": 3994
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3418927192687988,
      "learning_rate": 0.00015767655352603608,
      "loss": 1.5244,
      "step": 3995
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5242671966552734,
      "learning_rate": 0.0001576569535645723,
      "loss": 1.4033,
      "step": 3996
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2058335542678833,
      "learning_rate": 0.00015763735028461915,
      "loss": 1.4182,
      "step": 3997
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.7437403202056885,
      "learning_rate": 0.00015761774368730494,
      "loss": 1.9066,
      "step": 3998
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7787702083587646,
      "learning_rate": 0.00015759813377375808,
      "loss": 1.5848,
      "step": 3999
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2778657674789429,
      "learning_rate": 0.0001575785205451073,
      "loss": 1.713,
      "step": 4000
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4619585275650024,
      "learning_rate": 0.00015755890400248146,
      "loss": 1.4614,
      "step": 4001
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.400244951248169,
      "learning_rate": 0.00015753928414700955,
      "loss": 1.2921,
      "step": 4002
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5238988399505615,
      "learning_rate": 0.00015751966097982082,
      "loss": 1.6172,
      "step": 4003
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7320818901062012,
      "learning_rate": 0.00015750003450204473,
      "loss": 1.904,
      "step": 4004
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.8910314440727234,
      "learning_rate": 0.0001574804047148109,
      "loss": 1.3557,
      "step": 4005
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.053166627883911,
      "learning_rate": 0.00015746077161924905,
      "loss": 1.7528,
      "step": 4006
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.339915156364441,
      "learning_rate": 0.0001574411352164893,
      "loss": 1.8567,
      "step": 4007
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.748549461364746,
      "learning_rate": 0.00015742149550766174,
      "loss": 1.8719,
      "step": 4008
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.253838300704956,
      "learning_rate": 0.0001574018524938968,
      "loss": 2.3526,
      "step": 4009
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.436687707901001,
      "learning_rate": 0.00015738220617632503,
      "loss": 1.3173,
      "step": 4010
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6250596046447754,
      "learning_rate": 0.0001573625565560772,
      "loss": 1.0248,
      "step": 4011
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.276930093765259,
      "learning_rate": 0.00015734290363428422,
      "loss": 1.9899,
      "step": 4012
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2748504877090454,
      "learning_rate": 0.0001573232474120773,
      "loss": 1.4803,
      "step": 4013
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0843052864074707,
      "learning_rate": 0.0001573035878905877,
      "loss": 1.6317,
      "step": 4014
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5202258825302124,
      "learning_rate": 0.000157283925070947,
      "loss": 1.6089,
      "step": 4015
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.8917831778526306,
      "learning_rate": 0.00015726425895428684,
      "loss": 1.6801,
      "step": 4016
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1503384113311768,
      "learning_rate": 0.00015724458954173917,
      "loss": 1.5628,
      "step": 4017
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.4904539585113525,
      "learning_rate": 0.00015722491683443606,
      "loss": 1.4912,
      "step": 4018
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9754858016967773,
      "learning_rate": 0.00015720524083350982,
      "loss": 1.4804,
      "step": 4019
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1833487749099731,
      "learning_rate": 0.00015718556154009283,
      "loss": 1.6981,
      "step": 4020
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3675105571746826,
      "learning_rate": 0.00015716587895531783,
      "loss": 1.8636,
      "step": 4021
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7266604900360107,
      "learning_rate": 0.00015714619308031765,
      "loss": 1.6282,
      "step": 4022
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.7691731452941895,
      "learning_rate": 0.0001571265039162253,
      "loss": 1.54,
      "step": 4023
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2607325315475464,
      "learning_rate": 0.000157106811464174,
      "loss": 1.6959,
      "step": 4024
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9828615784645081,
      "learning_rate": 0.0001570871157252972,
      "loss": 0.8948,
      "step": 4025
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0164618492126465,
      "learning_rate": 0.0001570674167007285,
      "loss": 1.525,
      "step": 4026
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1341583728790283,
      "learning_rate": 0.00015704771439160165,
      "loss": 1.5967,
      "step": 4027
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6227061748504639,
      "learning_rate": 0.00015702800879905068,
      "loss": 2.0191,
      "step": 4028
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3228517770767212,
      "learning_rate": 0.00015700829992420975,
      "loss": 2.1278,
      "step": 4029
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2315956354141235,
      "learning_rate": 0.0001569885877682132,
      "loss": 0.8344,
      "step": 4030
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9033571481704712,
      "learning_rate": 0.0001569688723321956,
      "loss": 1.5514,
      "step": 4031
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.16065514087677,
      "learning_rate": 0.00015694915361729163,
      "loss": 1.4827,
      "step": 4032
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2907543182373047,
      "learning_rate": 0.00015692943162463628,
      "loss": 1.5788,
      "step": 4033
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1742682456970215,
      "learning_rate": 0.00015690970635536466,
      "loss": 1.883,
      "step": 4034
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2588188648223877,
      "learning_rate": 0.00015688997781061203,
      "loss": 0.7635,
      "step": 4035
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.848487615585327,
      "learning_rate": 0.00015687024599151392,
      "loss": 3.174,
      "step": 4036
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3616507053375244,
      "learning_rate": 0.000156850510899206,
      "loss": 1.9144,
      "step": 4037
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3106921911239624,
      "learning_rate": 0.00015683077253482414,
      "loss": 1.9594,
      "step": 4038
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5258063077926636,
      "learning_rate": 0.00015681103089950436,
      "loss": 1.4924,
      "step": 4039
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5364445447921753,
      "learning_rate": 0.00015679128599438297,
      "loss": 1.2366,
      "step": 4040
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3445065021514893,
      "learning_rate": 0.00015677153782059635,
      "loss": 1.2496,
      "step": 4041
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5080325603485107,
      "learning_rate": 0.00015675178637928114,
      "loss": 1.734,
      "step": 4042
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.112967848777771,
      "learning_rate": 0.00015673203167157417,
      "loss": 1.5188,
      "step": 4043
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.224763870239258,
      "learning_rate": 0.0001567122736986124,
      "loss": 1.466,
      "step": 4044
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.104982852935791,
      "learning_rate": 0.000156692512461533,
      "loss": 1.1511,
      "step": 4045
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3550864458084106,
      "learning_rate": 0.0001566727479614734,
      "loss": 1.6372,
      "step": 4046
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7799453735351562,
      "learning_rate": 0.00015665298019957116,
      "loss": 1.2873,
      "step": 4047
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3379591703414917,
      "learning_rate": 0.00015663320917696396,
      "loss": 1.2071,
      "step": 4048
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8570858240127563,
      "learning_rate": 0.0001566134348947898,
      "loss": 1.3774,
      "step": 4049
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1747987270355225,
      "learning_rate": 0.0001565936573541868,
      "loss": 1.0589,
      "step": 4050
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5776042938232422,
      "learning_rate": 0.00015657387655629323,
      "loss": 1.8013,
      "step": 4051
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6979206800460815,
      "learning_rate": 0.0001565540925022476,
      "loss": 1.6858,
      "step": 4052
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.048593521118164,
      "learning_rate": 0.00015653430519318862,
      "loss": 1.097,
      "step": 4053
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3610543012619019,
      "learning_rate": 0.00015651451463025517,
      "loss": 1.175,
      "step": 4054
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.9309492707252502,
      "learning_rate": 0.0001564947208145863,
      "loss": 1.0213,
      "step": 4055
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.84390389919281,
      "learning_rate": 0.0001564749237473212,
      "loss": 1.7034,
      "step": 4056
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2530114650726318,
      "learning_rate": 0.0001564551234295994,
      "loss": 1.975,
      "step": 4057
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4501588344573975,
      "learning_rate": 0.00015643531986256047,
      "loss": 1.8485,
      "step": 4058
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5094013214111328,
      "learning_rate": 0.00015641551304734422,
      "loss": 1.5199,
      "step": 4059
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.094417691230774,
      "learning_rate": 0.00015639570298509064,
      "loss": 2.2153,
      "step": 4060
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6084221601486206,
      "learning_rate": 0.00015637588967693997,
      "loss": 1.6607,
      "step": 4061
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3005759716033936,
      "learning_rate": 0.0001563560731240325,
      "loss": 1.2576,
      "step": 4062
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2994794845581055,
      "learning_rate": 0.00015633625332750882,
      "loss": 1.5522,
      "step": 4063
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.880218744277954,
      "learning_rate": 0.0001563164302885097,
      "loss": 1.842,
      "step": 4064
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0380518436431885,
      "learning_rate": 0.00015629660400817604,
      "loss": 1.6216,
      "step": 4065
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3852065801620483,
      "learning_rate": 0.00015627677448764896,
      "loss": 1.524,
      "step": 4066
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.613908052444458,
      "learning_rate": 0.00015625694172806976,
      "loss": 2.0098,
      "step": 4067
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9857579469680786,
      "learning_rate": 0.00015623710573057996,
      "loss": 1.5212,
      "step": 4068
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.8196505904197693,
      "learning_rate": 0.00015621726649632116,
      "loss": 1.0208,
      "step": 4069
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.8179872035980225,
      "learning_rate": 0.0001561974240264353,
      "loss": 1.0184,
      "step": 4070
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.619863510131836,
      "learning_rate": 0.00015617757832206438,
      "loss": 0.8391,
      "step": 4071
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0666165351867676,
      "learning_rate": 0.0001561577293843507,
      "loss": 1.1724,
      "step": 4072
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3793888092041016,
      "learning_rate": 0.00015613787721443663,
      "loss": 2.1229,
      "step": 4073
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5787466764450073,
      "learning_rate": 0.00015611802181346476,
      "loss": 1.4332,
      "step": 4074
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.746543049812317,
      "learning_rate": 0.00015609816318257788,
      "loss": 1.5612,
      "step": 4075
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4334269762039185,
      "learning_rate": 0.00015607830132291904,
      "loss": 1.7791,
      "step": 4076
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6243151426315308,
      "learning_rate": 0.00015605843623563135,
      "loss": 1.8735,
      "step": 4077
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.143315315246582,
      "learning_rate": 0.00015603856792185817,
      "loss": 1.3479,
      "step": 4078
    },
    {
      "epoch": 0.31,
      "grad_norm": 4.3892621994018555,
      "learning_rate": 0.000156018696382743,
      "loss": 1.4043,
      "step": 4079
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.235616683959961,
      "learning_rate": 0.00015599882161942966,
      "loss": 1.5662,
      "step": 4080
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5948801040649414,
      "learning_rate": 0.0001559789436330619,
      "loss": 1.9283,
      "step": 4081
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.215663194656372,
      "learning_rate": 0.00015595906242478399,
      "loss": 1.6745,
      "step": 4082
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.795485496520996,
      "learning_rate": 0.00015593917799574007,
      "loss": 1.9466,
      "step": 4083
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.101409673690796,
      "learning_rate": 0.0001559192903470747,
      "loss": 1.7135,
      "step": 4084
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1422795057296753,
      "learning_rate": 0.00015589939947993246,
      "loss": 1.8041,
      "step": 4085
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.247352123260498,
      "learning_rate": 0.0001558795053954582,
      "loss": 1.5809,
      "step": 4086
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.941565752029419,
      "learning_rate": 0.00015585960809479696,
      "loss": 0.5544,
      "step": 4087
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1898820400238037,
      "learning_rate": 0.0001558397075790939,
      "loss": 1.4759,
      "step": 4088
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5005676746368408,
      "learning_rate": 0.0001558198038494945,
      "loss": 1.3871,
      "step": 4089
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4076660871505737,
      "learning_rate": 0.00015579989690714423,
      "loss": 1.2878,
      "step": 4090
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6019365787506104,
      "learning_rate": 0.0001557799867531889,
      "loss": 1.5888,
      "step": 4091
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1255342960357666,
      "learning_rate": 0.00015576007338877448,
      "loss": 1.5513,
      "step": 4092
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.131397247314453,
      "learning_rate": 0.00015574015681504703,
      "loss": 0.9781,
      "step": 4093
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2815465927124023,
      "learning_rate": 0.0001557202370331529,
      "loss": 1.2563,
      "step": 4094
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.502588152885437,
      "learning_rate": 0.0001557003140442386,
      "loss": 2.1834,
      "step": 4095
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.28338360786438,
      "learning_rate": 0.00015568038784945077,
      "loss": 2.0672,
      "step": 4096
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0000617504119873,
      "learning_rate": 0.00015566045844993636,
      "loss": 1.3105,
      "step": 4097
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.9594290256500244,
      "learning_rate": 0.00015564052584684235,
      "loss": 2.4313,
      "step": 4098
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0612542629241943,
      "learning_rate": 0.000155620590041316,
      "loss": 1.4098,
      "step": 4099
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.8951611518859863,
      "learning_rate": 0.00015560065103450472,
      "loss": 1.3417,
      "step": 4100
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5202577114105225,
      "learning_rate": 0.00015558070882755608,
      "loss": 1.6044,
      "step": 4101
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0494600534439087,
      "learning_rate": 0.00015556076342161795,
      "loss": 1.3811,
      "step": 4102
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3223514556884766,
      "learning_rate": 0.00015554081481783827,
      "loss": 1.5725,
      "step": 4103
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.771416187286377,
      "learning_rate": 0.00015552086301736515,
      "loss": 1.689,
      "step": 4104
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2513636350631714,
      "learning_rate": 0.000155500908021347,
      "loss": 1.9223,
      "step": 4105
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0203574895858765,
      "learning_rate": 0.0001554809498309323,
      "loss": 1.3442,
      "step": 4106
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1013503074645996,
      "learning_rate": 0.00015546098844726979,
      "loss": 1.8298,
      "step": 4107
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3684417009353638,
      "learning_rate": 0.00015544102387150832,
      "loss": 1.4631,
      "step": 4108
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2036659717559814,
      "learning_rate": 0.000155421056104797,
      "loss": 0.5773,
      "step": 4109
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5284160375595093,
      "learning_rate": 0.0001554010851482851,
      "loss": 1.5228,
      "step": 4110
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7704992294311523,
      "learning_rate": 0.000155381111003122,
      "loss": 1.2478,
      "step": 4111
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6086316108703613,
      "learning_rate": 0.00015536113367045743,
      "loss": 1.359,
      "step": 4112
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2677024602890015,
      "learning_rate": 0.00015534115315144113,
      "loss": 1.4411,
      "step": 4113
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9112883806228638,
      "learning_rate": 0.00015532116944722308,
      "loss": 1.5933,
      "step": 4114
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.2705650329589844,
      "learning_rate": 0.0001553011825589535,
      "loss": 1.8645,
      "step": 4115
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3911248445510864,
      "learning_rate": 0.00015528119248778272,
      "loss": 1.8438,
      "step": 4116
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.283413290977478,
      "learning_rate": 0.00015526119923486132,
      "loss": 1.1675,
      "step": 4117
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5204799175262451,
      "learning_rate": 0.00015524120280133998,
      "loss": 1.4299,
      "step": 4118
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0682450532913208,
      "learning_rate": 0.00015522120318836962,
      "loss": 1.5021,
      "step": 4119
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6098397970199585,
      "learning_rate": 0.00015520120039710139,
      "loss": 1.7695,
      "step": 4120
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2295291423797607,
      "learning_rate": 0.0001551811944286865,
      "loss": 1.5106,
      "step": 4121
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.0375710725784302,
      "learning_rate": 0.00015516118528427643,
      "loss": 1.4628,
      "step": 4122
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6233023405075073,
      "learning_rate": 0.0001551411729650228,
      "loss": 1.7969,
      "step": 4123
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5603013038635254,
      "learning_rate": 0.00015512115747207749,
      "loss": 2.2574,
      "step": 4124
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9263139963150024,
      "learning_rate": 0.00015510113880659246,
      "loss": 1.964,
      "step": 4125
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1850385665893555,
      "learning_rate": 0.0001550811169697199,
      "loss": 1.2615,
      "step": 4126
    },
    {
      "epoch": 0.31,
      "grad_norm": 5.944307804107666,
      "learning_rate": 0.00015506109196261223,
      "loss": 1.8363,
      "step": 4127
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2131215333938599,
      "learning_rate": 0.00015504106378642194,
      "loss": 1.5044,
      "step": 4128
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.233285665512085,
      "learning_rate": 0.0001550210324423018,
      "loss": 1.3792,
      "step": 4129
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1556248664855957,
      "learning_rate": 0.00015500099793140475,
      "loss": 1.7399,
      "step": 4130
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3123124837875366,
      "learning_rate": 0.0001549809602548838,
      "loss": 1.415,
      "step": 4131
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4996397495269775,
      "learning_rate": 0.00015496091941389234,
      "loss": 0.7918,
      "step": 4132
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.011246681213379,
      "learning_rate": 0.0001549408754095838,
      "loss": 2.1293,
      "step": 4133
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5494531393051147,
      "learning_rate": 0.00015492082824311183,
      "loss": 1.2924,
      "step": 4134
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.111364483833313,
      "learning_rate": 0.0001549007779156302,
      "loss": 1.3129,
      "step": 4135
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.790956497192383,
      "learning_rate": 0.00015488072442829305,
      "loss": 1.4828,
      "step": 4136
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4124730825424194,
      "learning_rate": 0.00015486066778225445,
      "loss": 1.5037,
      "step": 4137
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.7111315727233887,
      "learning_rate": 0.00015484060797866885,
      "loss": 1.8355,
      "step": 4138
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3722468614578247,
      "learning_rate": 0.00015482054501869075,
      "loss": 1.8685,
      "step": 4139
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3979604244232178,
      "learning_rate": 0.00015480047890347492,
      "loss": 2.6869,
      "step": 4140
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.228682041168213,
      "learning_rate": 0.0001547804096341763,
      "loss": 2.0281,
      "step": 4141
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0409401655197144,
      "learning_rate": 0.00015476033721194995,
      "loss": 1.4291,
      "step": 4142
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.179781436920166,
      "learning_rate": 0.00015474026163795119,
      "loss": 1.4627,
      "step": 4143
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1429835557937622,
      "learning_rate": 0.00015472018291333547,
      "loss": 1.4494,
      "step": 4144
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8057186603546143,
      "learning_rate": 0.00015470010103925841,
      "loss": 1.8741,
      "step": 4145
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.403982162475586,
      "learning_rate": 0.00015468001601687593,
      "loss": 1.2786,
      "step": 4146
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0550892353057861,
      "learning_rate": 0.0001546599278473439,
      "loss": 1.9462,
      "step": 4147
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5329058170318604,
      "learning_rate": 0.00015463983653181862,
      "loss": 1.5537,
      "step": 4148
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0739796161651611,
      "learning_rate": 0.00015461974207145638,
      "loss": 0.9528,
      "step": 4149
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9111593961715698,
      "learning_rate": 0.00015459964446741382,
      "loss": 2.622,
      "step": 4150
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2230265140533447,
      "learning_rate": 0.00015457954372084763,
      "loss": 1.6038,
      "step": 4151
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3594377040863037,
      "learning_rate": 0.00015455943983291469,
      "loss": 1.5672,
      "step": 4152
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2976338863372803,
      "learning_rate": 0.00015453933280477214,
      "loss": 1.6686,
      "step": 4153
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6970274448394775,
      "learning_rate": 0.0001545192226375772,
      "loss": 1.7467,
      "step": 4154
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4039156436920166,
      "learning_rate": 0.00015449910933248743,
      "loss": 1.3212,
      "step": 4155
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.148066520690918,
      "learning_rate": 0.00015447899289066034,
      "loss": 1.7759,
      "step": 4156
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3226417303085327,
      "learning_rate": 0.00015445887331325383,
      "loss": 0.8857,
      "step": 4157
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.52237606048584,
      "learning_rate": 0.0001544387506014259,
      "loss": 1.4878,
      "step": 4158
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4006102085113525,
      "learning_rate": 0.0001544186247563347,
      "loss": 1.6052,
      "step": 4159
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.43133544921875,
      "learning_rate": 0.00015439849577913856,
      "loss": 1.5378,
      "step": 4160
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9550149440765381,
      "learning_rate": 0.0001543783636709961,
      "loss": 0.8933,
      "step": 4161
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8766151666641235,
      "learning_rate": 0.00015435822843306592,
      "loss": 1.2398,
      "step": 4162
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1206552982330322,
      "learning_rate": 0.00015433809006650703,
      "loss": 1.1496,
      "step": 4163
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0961745977401733,
      "learning_rate": 0.00015431794857247847,
      "loss": 0.7993,
      "step": 4164
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.769824504852295,
      "learning_rate": 0.0001542978039521395,
      "loss": 1.4123,
      "step": 4165
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1291941404342651,
      "learning_rate": 0.0001542776562066495,
      "loss": 1.1175,
      "step": 4166
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.085787296295166,
      "learning_rate": 0.0001542575053371682,
      "loss": 1.2525,
      "step": 4167
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.1958365440368652,
      "learning_rate": 0.00015423735134485536,
      "loss": 1.5302,
      "step": 4168
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9692180752754211,
      "learning_rate": 0.0001542171942308709,
      "loss": 1.7709,
      "step": 4169
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.806141972541809,
      "learning_rate": 0.000154197033996375,
      "loss": 1.3439,
      "step": 4170
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.33609676361084,
      "learning_rate": 0.00015417687064252805,
      "loss": 2.0443,
      "step": 4171
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2324546575546265,
      "learning_rate": 0.00015415670417049052,
      "loss": 1.8095,
      "step": 4172
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1998465061187744,
      "learning_rate": 0.00015413653458142313,
      "loss": 1.6961,
      "step": 4173
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9540925621986389,
      "learning_rate": 0.00015411636187648674,
      "loss": 1.6289,
      "step": 4174
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2378922700881958,
      "learning_rate": 0.0001540961860568424,
      "loss": 1.2487,
      "step": 4175
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.576397657394409,
      "learning_rate": 0.00015407600712365134,
      "loss": 2.4807,
      "step": 4176
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9190679788589478,
      "learning_rate": 0.00015405582507807504,
      "loss": 1.4934,
      "step": 4177
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4671469926834106,
      "learning_rate": 0.00015403563992127497,
      "loss": 1.9188,
      "step": 4178
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9988290071487427,
      "learning_rate": 0.000154015451654413,
      "loss": 1.4721,
      "step": 4179
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5827176570892334,
      "learning_rate": 0.00015399526027865107,
      "loss": 0.9939,
      "step": 4180
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8529517650604248,
      "learning_rate": 0.0001539750657951513,
      "loss": 1.5728,
      "step": 4181
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.080397129058838,
      "learning_rate": 0.00015395486820507598,
      "loss": 1.7607,
      "step": 4182
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.154158592224121,
      "learning_rate": 0.0001539346675095876,
      "loss": 1.5007,
      "step": 4183
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6012204885482788,
      "learning_rate": 0.00015391446370984884,
      "loss": 1.2152,
      "step": 4184
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1034135818481445,
      "learning_rate": 0.00015389425680702257,
      "loss": 1.6183,
      "step": 4185
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9097841382026672,
      "learning_rate": 0.00015387404680227175,
      "loss": 0.9633,
      "step": 4186
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.20456862449646,
      "learning_rate": 0.0001538538336967596,
      "loss": 1.4395,
      "step": 4187
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1379979848861694,
      "learning_rate": 0.00015383361749164953,
      "loss": 1.7023,
      "step": 4188
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.243886947631836,
      "learning_rate": 0.0001538133981881051,
      "loss": 0.8999,
      "step": 4189
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.789825439453125,
      "learning_rate": 0.00015379317578729005,
      "loss": 1.6249,
      "step": 4190
    },
    {
      "epoch": 0.32,
      "grad_norm": 4.8166913986206055,
      "learning_rate": 0.00015377295029036825,
      "loss": 1.8845,
      "step": 4191
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3521908521652222,
      "learning_rate": 0.00015375272169850383,
      "loss": 1.1732,
      "step": 4192
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.819221019744873,
      "learning_rate": 0.00015373249001286103,
      "loss": 1.607,
      "step": 4193
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4962987899780273,
      "learning_rate": 0.00015371225523460433,
      "loss": 1.1245,
      "step": 4194
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.737860918045044,
      "learning_rate": 0.0001536920173648984,
      "loss": 1.7559,
      "step": 4195
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5538790225982666,
      "learning_rate": 0.00015367177640490795,
      "loss": 1.2859,
      "step": 4196
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0334223508834839,
      "learning_rate": 0.00015365153235579802,
      "loss": 0.7285,
      "step": 4197
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5521998405456543,
      "learning_rate": 0.00015363128521873377,
      "loss": 1.3023,
      "step": 4198
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8502744436264038,
      "learning_rate": 0.00015361103499488053,
      "loss": 1.5032,
      "step": 4199
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3506996631622314,
      "learning_rate": 0.00015359078168540382,
      "loss": 1.9226,
      "step": 4200
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0195200443267822,
      "learning_rate": 0.00015357052529146935,
      "loss": 1.3395,
      "step": 4201
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.165113091468811,
      "learning_rate": 0.00015355026581424292,
      "loss": 1.5456,
      "step": 4202
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.44685959815979,
      "learning_rate": 0.00015353000325489066,
      "loss": 1.6098,
      "step": 4203
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5379867553710938,
      "learning_rate": 0.00015350973761457884,
      "loss": 1.2661,
      "step": 4204
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.195510745048523,
      "learning_rate": 0.00015348946889447368,
      "loss": 1.4804,
      "step": 4205
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.967592716217041,
      "learning_rate": 0.00015346919709574195,
      "loss": 1.5954,
      "step": 4206
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4399478435516357,
      "learning_rate": 0.00015344892221955033,
      "loss": 1.6866,
      "step": 4207
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0804866552352905,
      "learning_rate": 0.00015342864426706574,
      "loss": 1.3279,
      "step": 4208
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.281765103340149,
      "learning_rate": 0.00015340836323945537,
      "loss": 1.1983,
      "step": 4209
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9866077899932861,
      "learning_rate": 0.00015338807913788636,
      "loss": 1.2137,
      "step": 4210
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6729042530059814,
      "learning_rate": 0.00015336779196352632,
      "loss": 1.0829,
      "step": 4211
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.8971192836761475,
      "learning_rate": 0.00015334750171754283,
      "loss": 1.5221,
      "step": 4212
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.0506041049957275,
      "learning_rate": 0.00015332720840110375,
      "loss": 1.7382,
      "step": 4213
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9004542827606201,
      "learning_rate": 0.00015330691201537706,
      "loss": 1.6622,
      "step": 4214
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4731677770614624,
      "learning_rate": 0.00015328661256153091,
      "loss": 1.7589,
      "step": 4215
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4193482398986816,
      "learning_rate": 0.00015326631004073364,
      "loss": 1.5324,
      "step": 4216
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.8266592025756836,
      "learning_rate": 0.00015324600445415382,
      "loss": 1.7591,
      "step": 4217
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9532005786895752,
      "learning_rate": 0.00015322569580296017,
      "loss": 1.0426,
      "step": 4218
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2454605102539062,
      "learning_rate": 0.0001532053840883215,
      "loss": 1.036,
      "step": 4219
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.160077691078186,
      "learning_rate": 0.0001531850693114069,
      "loss": 1.5048,
      "step": 4220
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3288952112197876,
      "learning_rate": 0.0001531647514733856,
      "loss": 1.4408,
      "step": 4221
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6900595426559448,
      "learning_rate": 0.00015314443057542703,
      "loss": 1.6855,
      "step": 4222
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7702051401138306,
      "learning_rate": 0.00015312410661870075,
      "loss": 1.3898,
      "step": 4223
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.182692766189575,
      "learning_rate": 0.00015310377960437655,
      "loss": 1.5276,
      "step": 4224
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.200721025466919,
      "learning_rate": 0.0001530834495336243,
      "loss": 1.527,
      "step": 4225
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4783064126968384,
      "learning_rate": 0.0001530631164076142,
      "loss": 0.9666,
      "step": 4226
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.040252685546875,
      "learning_rate": 0.00015304278022751648,
      "loss": 1.3766,
      "step": 4227
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4085710048675537,
      "learning_rate": 0.00015302244099450163,
      "loss": 1.8188,
      "step": 4228
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4589476585388184,
      "learning_rate": 0.00015300209870974026,
      "loss": 1.2453,
      "step": 4229
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1886327266693115,
      "learning_rate": 0.0001529817533744032,
      "loss": 1.5938,
      "step": 4230
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.7345187664031982,
      "learning_rate": 0.00015296140498966145,
      "loss": 1.7054,
      "step": 4231
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1074328422546387,
      "learning_rate": 0.0001529410535566862,
      "loss": 1.0883,
      "step": 4232
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9749411940574646,
      "learning_rate": 0.00015292069907664875,
      "loss": 1.6999,
      "step": 4233
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.422886610031128,
      "learning_rate": 0.0001529003415507206,
      "loss": 2.1126,
      "step": 4234
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7271873950958252,
      "learning_rate": 0.0001528799809800735,
      "loss": 1.462,
      "step": 4235
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1001627445220947,
      "learning_rate": 0.0001528596173658793,
      "loss": 1.6256,
      "step": 4236
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4662809371948242,
      "learning_rate": 0.00015283925070931002,
      "loss": 1.3113,
      "step": 4237
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.413010597229004,
      "learning_rate": 0.0001528188810115379,
      "loss": 1.0579,
      "step": 4238
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.7408642768859863,
      "learning_rate": 0.00015279850827373535,
      "loss": 1.5193,
      "step": 4239
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5681027173995972,
      "learning_rate": 0.00015277813249707487,
      "loss": 0.865,
      "step": 4240
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1399493217468262,
      "learning_rate": 0.0001527577536827293,
      "loss": 1.515,
      "step": 4241
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0298805236816406,
      "learning_rate": 0.00015273737183187148,
      "loss": 1.8718,
      "step": 4242
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9037831425666809,
      "learning_rate": 0.00015271698694567452,
      "loss": 1.1451,
      "step": 4243
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9181755781173706,
      "learning_rate": 0.0001526965990253117,
      "loss": 1.6951,
      "step": 4244
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.9112578630447388,
      "learning_rate": 0.00015267620807195647,
      "loss": 1.4813,
      "step": 4245
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1957811117172241,
      "learning_rate": 0.00015265581408678244,
      "loss": 1.1655,
      "step": 4246
    },
    {
      "epoch": 0.32,
      "grad_norm": 4.4678955078125,
      "learning_rate": 0.0001526354170709634,
      "loss": 1.7669,
      "step": 4247
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3533265590667725,
      "learning_rate": 0.0001526150170256733,
      "loss": 1.4495,
      "step": 4248
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2308430671691895,
      "learning_rate": 0.00015259461395208628,
      "loss": 1.5373,
      "step": 4249
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.525113821029663,
      "learning_rate": 0.00015257420785137667,
      "loss": 1.6891,
      "step": 4250
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.324422836303711,
      "learning_rate": 0.000152553798724719,
      "loss": 1.3701,
      "step": 4251
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6599849462509155,
      "learning_rate": 0.00015253338657328784,
      "loss": 0.9469,
      "step": 4252
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.278966188430786,
      "learning_rate": 0.00015251297139825808,
      "loss": 2.1375,
      "step": 4253
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4641180038452148,
      "learning_rate": 0.00015249255320080475,
      "loss": 1.4188,
      "step": 4254
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.375013828277588,
      "learning_rate": 0.000152472131982103,
      "loss": 0.9004,
      "step": 4255
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2792097330093384,
      "learning_rate": 0.0001524517077433282,
      "loss": 1.9117,
      "step": 4256
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2357218265533447,
      "learning_rate": 0.00015243128048565588,
      "loss": 1.3456,
      "step": 4257
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.1047604084014893,
      "learning_rate": 0.00015241085021026172,
      "loss": 1.4586,
      "step": 4258
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4472967386245728,
      "learning_rate": 0.00015239041691832164,
      "loss": 2.0918,
      "step": 4259
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.130683422088623,
      "learning_rate": 0.0001523699806110117,
      "loss": 1.5731,
      "step": 4260
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9415346384048462,
      "learning_rate": 0.0001523495412895081,
      "loss": 1.4018,
      "step": 4261
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0844814777374268,
      "learning_rate": 0.00015232909895498726,
      "loss": 2.0307,
      "step": 4262
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0138764381408691,
      "learning_rate": 0.00015230865360862572,
      "loss": 1.2281,
      "step": 4263
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9924100637435913,
      "learning_rate": 0.00015228820525160024,
      "loss": 2.011,
      "step": 4264
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4282402992248535,
      "learning_rate": 0.00015226775388508778,
      "loss": 1.8521,
      "step": 4265
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1661068201065063,
      "learning_rate": 0.0001522472995102654,
      "loss": 1.1813,
      "step": 4266
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9357667565345764,
      "learning_rate": 0.00015222684212831035,
      "loss": 1.6822,
      "step": 4267
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4490426778793335,
      "learning_rate": 0.0001522063817404001,
      "loss": 2.0216,
      "step": 4268
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.400903582572937,
      "learning_rate": 0.00015218591834771224,
      "loss": 1.6817,
      "step": 4269
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.0427184104919434,
      "learning_rate": 0.0001521654519514246,
      "loss": 1.4442,
      "step": 4270
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4648215770721436,
      "learning_rate": 0.00015214498255271513,
      "loss": 1.424,
      "step": 4271
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.6934335231781006,
      "learning_rate": 0.00015212451015276187,
      "loss": 1.8586,
      "step": 4272
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0555566549301147,
      "learning_rate": 0.00015210403475274324,
      "loss": 1.3379,
      "step": 4273
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9061485528945923,
      "learning_rate": 0.00015208355635383763,
      "loss": 1.3849,
      "step": 4274
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7946248054504395,
      "learning_rate": 0.00015206307495722377,
      "loss": 1.1939,
      "step": 4275
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5846130847930908,
      "learning_rate": 0.00015204259056408046,
      "loss": 1.1731,
      "step": 4276
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1361098289489746,
      "learning_rate": 0.00015202210317558667,
      "loss": 1.3536,
      "step": 4277
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2815276384353638,
      "learning_rate": 0.00015200161279292155,
      "loss": 0.9937,
      "step": 4278
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7220075130462646,
      "learning_rate": 0.0001519811194172645,
      "loss": 1.731,
      "step": 4279
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3887735605239868,
      "learning_rate": 0.00015196062304979497,
      "loss": 1.5786,
      "step": 4280
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1704034805297852,
      "learning_rate": 0.00015194012369169264,
      "loss": 2.2332,
      "step": 4281
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2150152921676636,
      "learning_rate": 0.00015191962134413744,
      "loss": 1.5148,
      "step": 4282
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1247868537902832,
      "learning_rate": 0.00015189911600830934,
      "loss": 1.8638,
      "step": 4283
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1968967914581299,
      "learning_rate": 0.00015187860768538857,
      "loss": 1.3841,
      "step": 4284
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.3803722858428955,
      "learning_rate": 0.0001518580963765555,
      "loss": 1.5997,
      "step": 4285
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1763240098953247,
      "learning_rate": 0.0001518375820829906,
      "loss": 1.6117,
      "step": 4286
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0104539394378662,
      "learning_rate": 0.00015181706480587467,
      "loss": 1.3068,
      "step": 4287
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6844544410705566,
      "learning_rate": 0.0001517965445463886,
      "loss": 1.9323,
      "step": 4288
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1747593879699707,
      "learning_rate": 0.0001517760213057134,
      "loss": 1.6498,
      "step": 4289
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.085290551185608,
      "learning_rate": 0.00015175549508503034,
      "loss": 1.5003,
      "step": 4290
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3824232816696167,
      "learning_rate": 0.0001517349658855208,
      "loss": 1.4334,
      "step": 4291
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.479669213294983,
      "learning_rate": 0.00015171443370836634,
      "loss": 1.4857,
      "step": 4292
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1866856813430786,
      "learning_rate": 0.00015169389855474873,
      "loss": 0.4335,
      "step": 4293
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.841578722000122,
      "learning_rate": 0.0001516733604258499,
      "loss": 0.9328,
      "step": 4294
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1784818172454834,
      "learning_rate": 0.00015165281932285192,
      "loss": 1.2444,
      "step": 4295
    },
    {
      "epoch": 0.33,
      "grad_norm": 5.478216648101807,
      "learning_rate": 0.000151632275246937,
      "loss": 1.4668,
      "step": 4296
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5118882656097412,
      "learning_rate": 0.00015161172819928764,
      "loss": 1.379,
      "step": 4297
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9348283410072327,
      "learning_rate": 0.00015159117818108644,
      "loss": 1.2139,
      "step": 4298
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.052912473678589,
      "learning_rate": 0.0001515706251935161,
      "loss": 0.9619,
      "step": 4299
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.8475001454353333,
      "learning_rate": 0.00015155006923775965,
      "loss": 1.1167,
      "step": 4300
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2402381896972656,
      "learning_rate": 0.00015152951031500016,
      "loss": 1.4199,
      "step": 4301
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0555179119110107,
      "learning_rate": 0.00015150894842642088,
      "loss": 1.4252,
      "step": 4302
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1135509014129639,
      "learning_rate": 0.00015148838357320537,
      "loss": 1.181,
      "step": 4303
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3886871337890625,
      "learning_rate": 0.00015146781575653715,
      "loss": 1.6658,
      "step": 4304
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.8787543773651123,
      "learning_rate": 0.00015144724497760003,
      "loss": 1.5288,
      "step": 4305
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6599724292755127,
      "learning_rate": 0.00015142667123757802,
      "loss": 1.2389,
      "step": 4306
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.90970778465271,
      "learning_rate": 0.00015140609453765524,
      "loss": 1.6084,
      "step": 4307
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.9605998396873474,
      "learning_rate": 0.00015138551487901602,
      "loss": 1.4637,
      "step": 4308
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1653990745544434,
      "learning_rate": 0.0001513649322628448,
      "loss": 1.6092,
      "step": 4309
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1640069484710693,
      "learning_rate": 0.00015134434669032625,
      "loss": 1.725,
      "step": 4310
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0697550773620605,
      "learning_rate": 0.00015132375816264513,
      "loss": 1.2588,
      "step": 4311
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.388767123222351,
      "learning_rate": 0.00015130316668098654,
      "loss": 1.688,
      "step": 4312
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7944860458374023,
      "learning_rate": 0.00015128257224653556,
      "loss": 1.3931,
      "step": 4313
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3962531089782715,
      "learning_rate": 0.0001512619748604775,
      "loss": 2.3823,
      "step": 4314
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4608933925628662,
      "learning_rate": 0.00015124137452399795,
      "loss": 1.382,
      "step": 4315
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.21999990940094,
      "learning_rate": 0.0001512207712382825,
      "loss": 1.9054,
      "step": 4316
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.568943738937378,
      "learning_rate": 0.00015120016500451698,
      "loss": 1.6395,
      "step": 4317
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.373591661453247,
      "learning_rate": 0.00015117955582388744,
      "loss": 1.5078,
      "step": 4318
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2718318700790405,
      "learning_rate": 0.00015115894369758007,
      "loss": 1.2287,
      "step": 4319
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.200800895690918,
      "learning_rate": 0.00015113832862678116,
      "loss": 0.7783,
      "step": 4320
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.8674513101577759,
      "learning_rate": 0.00015111771061267727,
      "loss": 1.1906,
      "step": 4321
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.94722580909729,
      "learning_rate": 0.00015109708965645505,
      "loss": 1.7104,
      "step": 4322
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0087388753890991,
      "learning_rate": 0.0001510764657593014,
      "loss": 1.258,
      "step": 4323
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3755658864974976,
      "learning_rate": 0.0001510558389224033,
      "loss": 1.5494,
      "step": 4324
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9741899967193604,
      "learning_rate": 0.00015103520914694796,
      "loss": 1.5028,
      "step": 4325
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3049513101577759,
      "learning_rate": 0.00015101457643412273,
      "loss": 1.5569,
      "step": 4326
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.3622946739196777,
      "learning_rate": 0.00015099394078511518,
      "loss": 1.7982,
      "step": 4327
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.946073293685913,
      "learning_rate": 0.000150973302201113,
      "loss": 1.3758,
      "step": 4328
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5854886770248413,
      "learning_rate": 0.000150952660683304,
      "loss": 1.7764,
      "step": 4329
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7997125387191772,
      "learning_rate": 0.00015093201623287631,
      "loss": 1.5938,
      "step": 4330
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0046095848083496,
      "learning_rate": 0.00015091136885101808,
      "loss": 1.6339,
      "step": 4331
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7910813093185425,
      "learning_rate": 0.00015089071853891767,
      "loss": 1.6866,
      "step": 4332
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2885074615478516,
      "learning_rate": 0.00015087006529776367,
      "loss": 1.5372,
      "step": 4333
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.376883864402771,
      "learning_rate": 0.00015084940912874476,
      "loss": 1.3697,
      "step": 4334
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0029922723770142,
      "learning_rate": 0.00015082875003304987,
      "loss": 0.9377,
      "step": 4335
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4447360038757324,
      "learning_rate": 0.00015080808801186797,
      "loss": 1.125,
      "step": 4336
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5356351137161255,
      "learning_rate": 0.00015078742306638834,
      "loss": 1.8059,
      "step": 4337
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2833611965179443,
      "learning_rate": 0.00015076675519780033,
      "loss": 1.5621,
      "step": 4338
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.255322813987732,
      "learning_rate": 0.00015074608440729352,
      "loss": 1.1284,
      "step": 4339
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0234997272491455,
      "learning_rate": 0.00015072541069605767,
      "loss": 0.8008,
      "step": 4340
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3292862176895142,
      "learning_rate": 0.00015070473406528256,
      "loss": 2.0497,
      "step": 4341
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1610805988311768,
      "learning_rate": 0.00015068405451615835,
      "loss": 1.5721,
      "step": 4342
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3736971616744995,
      "learning_rate": 0.00015066337204987522,
      "loss": 2.188,
      "step": 4343
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2107014656066895,
      "learning_rate": 0.0001506426866676236,
      "loss": 2.0255,
      "step": 4344
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2328732013702393,
      "learning_rate": 0.00015062199837059405,
      "loss": 1.3404,
      "step": 4345
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1806559562683105,
      "learning_rate": 0.0001506013071599772,
      "loss": 1.6755,
      "step": 4346
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5558372735977173,
      "learning_rate": 0.0001505806130369641,
      "loss": 1.8812,
      "step": 4347
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4385244846343994,
      "learning_rate": 0.00015055991600274572,
      "loss": 1.3896,
      "step": 4348
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0725080966949463,
      "learning_rate": 0.00015053921605851333,
      "loss": 1.6088,
      "step": 4349
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.185129165649414,
      "learning_rate": 0.00015051851320545834,
      "loss": 1.3746,
      "step": 4350
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0448575019836426,
      "learning_rate": 0.00015049780744477226,
      "loss": 1.9249,
      "step": 4351
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2185524702072144,
      "learning_rate": 0.00015047709877764686,
      "loss": 1.6003,
      "step": 4352
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1535604000091553,
      "learning_rate": 0.00015045638720527407,
      "loss": 1.2534,
      "step": 4353
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0451767444610596,
      "learning_rate": 0.00015043567272884594,
      "loss": 1.714,
      "step": 4354
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.4166440963745117,
      "learning_rate": 0.00015041495534955467,
      "loss": 2.1798,
      "step": 4355
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3566093444824219,
      "learning_rate": 0.00015039423506859275,
      "loss": 1.5632,
      "step": 4356
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3938788175582886,
      "learning_rate": 0.00015037351188715265,
      "loss": 1.6167,
      "step": 4357
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1200839281082153,
      "learning_rate": 0.00015035278580642717,
      "loss": 1.6191,
      "step": 4358
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.067038893699646,
      "learning_rate": 0.00015033205682760922,
      "loss": 1.1183,
      "step": 4359
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.990788996219635,
      "learning_rate": 0.00015031132495189187,
      "loss": 1.7871,
      "step": 4360
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3285517692565918,
      "learning_rate": 0.0001502905901804683,
      "loss": 1.4391,
      "step": 4361
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.474055528640747,
      "learning_rate": 0.00015026985251453197,
      "loss": 1.6741,
      "step": 4362
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0540852546691895,
      "learning_rate": 0.00015024911195527646,
      "loss": 1.6194,
      "step": 4363
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4396010637283325,
      "learning_rate": 0.00015022836850389547,
      "loss": 1.3783,
      "step": 4364
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0030885934829712,
      "learning_rate": 0.00015020762216158296,
      "loss": 1.0489,
      "step": 4365
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9737344980239868,
      "learning_rate": 0.00015018687292953293,
      "loss": 2.5696,
      "step": 4366
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4026063680648804,
      "learning_rate": 0.00015016612080893965,
      "loss": 1.6583,
      "step": 4367
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2118867635726929,
      "learning_rate": 0.00015014536580099756,
      "loss": 1.2081,
      "step": 4368
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5213872194290161,
      "learning_rate": 0.00015012460790690118,
      "loss": 1.3862,
      "step": 4369
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4472754001617432,
      "learning_rate": 0.00015010384712784523,
      "loss": 2.0949,
      "step": 4370
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0271449089050293,
      "learning_rate": 0.00015008308346502468,
      "loss": 1.5489,
      "step": 4371
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2271416187286377,
      "learning_rate": 0.0001500623169196346,
      "loss": 1.7309,
      "step": 4372
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9958633184432983,
      "learning_rate": 0.00015004154749287014,
      "loss": 1.6233,
      "step": 4373
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.54941725730896,
      "learning_rate": 0.00015002077518592676,
      "loss": 2.1922,
      "step": 4374
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0699145793914795,
      "learning_rate": 0.00015000000000000001,
      "loss": 1.33,
      "step": 4375
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3506758213043213,
      "learning_rate": 0.00014997922193628563,
      "loss": 1.6481,
      "step": 4376
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.5319469571113586,
      "learning_rate": 0.0001499584409959795,
      "loss": 1.1977,
      "step": 4377
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5646607875823975,
      "learning_rate": 0.00014993765718027775,
      "loss": 1.352,
      "step": 4378
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0844459533691406,
      "learning_rate": 0.00014991687049037647,
      "loss": 1.8214,
      "step": 4379
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0828248262405396,
      "learning_rate": 0.0001498960809274722,
      "loss": 1.3499,
      "step": 4380
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.0556936264038086,
      "learning_rate": 0.0001498752884927614,
      "loss": 1.2602,
      "step": 4381
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.7053444981575012,
      "learning_rate": 0.00014985449318744086,
      "loss": 1.6124,
      "step": 4382
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7427799701690674,
      "learning_rate": 0.00014983369501270742,
      "loss": 1.6796,
      "step": 4383
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5893495082855225,
      "learning_rate": 0.00014981289396975817,
      "loss": 1.4665,
      "step": 4384
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1464842557907104,
      "learning_rate": 0.0001497920900597903,
      "loss": 1.8286,
      "step": 4385
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.9157421588897705,
      "learning_rate": 0.0001497712832840012,
      "loss": 1.7677,
      "step": 4386
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.372239112854004,
      "learning_rate": 0.00014975047364358844,
      "loss": 1.317,
      "step": 4387
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.1582484245300293,
      "learning_rate": 0.00014972966113974972,
      "loss": 1.5821,
      "step": 4388
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.170802593231201,
      "learning_rate": 0.00014970884577368294,
      "loss": 2.3539,
      "step": 4389
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.024703860282898,
      "learning_rate": 0.00014968802754658614,
      "loss": 1.3774,
      "step": 4390
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5964839458465576,
      "learning_rate": 0.00014966720645965745,
      "loss": 1.5265,
      "step": 4391
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1736187934875488,
      "learning_rate": 0.00014964638251409537,
      "loss": 1.5339,
      "step": 4392
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1451486349105835,
      "learning_rate": 0.00014962555571109836,
      "loss": 1.6494,
      "step": 4393
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2058738470077515,
      "learning_rate": 0.0001496047260518651,
      "loss": 1.6171,
      "step": 4394
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5563585758209229,
      "learning_rate": 0.0001495838935375945,
      "loss": 1.6631,
      "step": 4395
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2828339338302612,
      "learning_rate": 0.00014956305816948566,
      "loss": 1.7969,
      "step": 4396
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0897302627563477,
      "learning_rate": 0.00014954221994873765,
      "loss": 1.5133,
      "step": 4397
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4699169397354126,
      "learning_rate": 0.00014952137887654988,
      "loss": 1.5818,
      "step": 4398
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.355128288269043,
      "learning_rate": 0.00014950053495412188,
      "loss": 1.6356,
      "step": 4399
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1801209449768066,
      "learning_rate": 0.00014947968818265333,
      "loss": 0.997,
      "step": 4400
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0844225883483887,
      "learning_rate": 0.0001494588385633441,
      "loss": 1.3912,
      "step": 4401
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6663920879364014,
      "learning_rate": 0.00014943798609739418,
      "loss": 1.3569,
      "step": 4402
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5103435516357422,
      "learning_rate": 0.00014941713078600374,
      "loss": 1.7241,
      "step": 4403
    },
    {
      "epoch": 0.34,
      "grad_norm": 4.823066711425781,
      "learning_rate": 0.00014939627263037316,
      "loss": 1.6264,
      "step": 4404
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2524231672286987,
      "learning_rate": 0.0001493754116317029,
      "loss": 1.7019,
      "step": 4405
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6792715787887573,
      "learning_rate": 0.0001493545477911937,
      "loss": 1.9382,
      "step": 4406
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1258089542388916,
      "learning_rate": 0.00014933368111004635,
      "loss": 1.2989,
      "step": 4407
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9210385680198669,
      "learning_rate": 0.00014931281158946184,
      "loss": 0.804,
      "step": 4408
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.841789722442627,
      "learning_rate": 0.00014929193923064133,
      "loss": 1.7406,
      "step": 4409
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.035879135131836,
      "learning_rate": 0.00014927106403478616,
      "loss": 1.3843,
      "step": 4410
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4220776557922363,
      "learning_rate": 0.00014925018600309785,
      "loss": 2.2616,
      "step": 4411
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.612937331199646,
      "learning_rate": 0.00014922930513677798,
      "loss": 1.4005,
      "step": 4412
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6202579736709595,
      "learning_rate": 0.0001492084214370284,
      "loss": 2.1691,
      "step": 4413
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.211552619934082,
      "learning_rate": 0.00014918753490505106,
      "loss": 1.3964,
      "step": 4414
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8027647733688354,
      "learning_rate": 0.00014916664554204818,
      "loss": 1.7241,
      "step": 4415
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5891510248184204,
      "learning_rate": 0.000149145753349222,
      "loss": 1.3081,
      "step": 4416
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.216617226600647,
      "learning_rate": 0.00014912485832777498,
      "loss": 1.5276,
      "step": 4417
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8747683763504028,
      "learning_rate": 0.00014910396047890974,
      "loss": 1.3609,
      "step": 4418
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7645087242126465,
      "learning_rate": 0.00014908305980382912,
      "loss": 1.6869,
      "step": 4419
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.831982970237732,
      "learning_rate": 0.00014906215630373606,
      "loss": 1.3865,
      "step": 4420
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8275710344314575,
      "learning_rate": 0.00014904124997983367,
      "loss": 2.1081,
      "step": 4421
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3221193552017212,
      "learning_rate": 0.0001490203408333252,
      "loss": 1.4927,
      "step": 4422
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.804702043533325,
      "learning_rate": 0.0001489994288654141,
      "loss": 1.5323,
      "step": 4423
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4198652505874634,
      "learning_rate": 0.00014897851407730402,
      "loss": 1.8071,
      "step": 4424
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.029524326324463,
      "learning_rate": 0.00014895759647019872,
      "loss": 1.8348,
      "step": 4425
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8373680114746094,
      "learning_rate": 0.00014893667604530203,
      "loss": 1.3996,
      "step": 4426
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6852352619171143,
      "learning_rate": 0.00014891575280381818,
      "loss": 2.4439,
      "step": 4427
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.426935911178589,
      "learning_rate": 0.00014889482674695134,
      "loss": 1.9615,
      "step": 4428
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0039403438568115,
      "learning_rate": 0.00014887389787590595,
      "loss": 2.1274,
      "step": 4429
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6000641584396362,
      "learning_rate": 0.00014885296619188658,
      "loss": 1.3948,
      "step": 4430
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4929956197738647,
      "learning_rate": 0.00014883203169609795,
      "loss": 1.1848,
      "step": 4431
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2630387544631958,
      "learning_rate": 0.00014881109438974498,
      "loss": 1.3599,
      "step": 4432
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.08262038230896,
      "learning_rate": 0.00014879015427403277,
      "loss": 1.4818,
      "step": 4433
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.995782732963562,
      "learning_rate": 0.0001487692113501665,
      "loss": 1.7427,
      "step": 4434
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.120015859603882,
      "learning_rate": 0.00014874826561935155,
      "loss": 2.1558,
      "step": 4435
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9541813731193542,
      "learning_rate": 0.00014872731708279348,
      "loss": 1.4703,
      "step": 4436
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1893153190612793,
      "learning_rate": 0.00014870636574169802,
      "loss": 1.2332,
      "step": 4437
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4971003532409668,
      "learning_rate": 0.00014868541159727096,
      "loss": 1.979,
      "step": 4438
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3459936380386353,
      "learning_rate": 0.00014866445465071845,
      "loss": 0.9379,
      "step": 4439
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.379616141319275,
      "learning_rate": 0.00014864349490324662,
      "loss": 2.1065,
      "step": 4440
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1576521396636963,
      "learning_rate": 0.0001486225323560618,
      "loss": 1.3396,
      "step": 4441
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3869080543518066,
      "learning_rate": 0.0001486015670103705,
      "loss": 2.2121,
      "step": 4442
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.368435025215149,
      "learning_rate": 0.0001485805988673795,
      "loss": 0.8632,
      "step": 4443
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3003026247024536,
      "learning_rate": 0.00014855962792829555,
      "loss": 1.4884,
      "step": 4444
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4258438348770142,
      "learning_rate": 0.00014853865419432564,
      "loss": 1.6849,
      "step": 4445
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1443126201629639,
      "learning_rate": 0.00014851767766667698,
      "loss": 1.2354,
      "step": 4446
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5185718536376953,
      "learning_rate": 0.00014849669834655682,
      "loss": 1.6815,
      "step": 4447
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5799823999404907,
      "learning_rate": 0.00014847571623517272,
      "loss": 1.4161,
      "step": 4448
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.2676315307617188,
      "learning_rate": 0.0001484547313337323,
      "loss": 2.4371,
      "step": 4449
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4433192014694214,
      "learning_rate": 0.00014843374364344333,
      "loss": 1.3206,
      "step": 4450
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.634594678878784,
      "learning_rate": 0.00014841275316551376,
      "loss": 1.4783,
      "step": 4451
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5595542192459106,
      "learning_rate": 0.00014839175990115177,
      "loss": 1.15,
      "step": 4452
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.066772222518921,
      "learning_rate": 0.00014837076385156562,
      "loss": 1.9202,
      "step": 4453
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2809466123580933,
      "learning_rate": 0.00014834976501796373,
      "loss": 1.5311,
      "step": 4454
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.725700855255127,
      "learning_rate": 0.00014832876340155476,
      "loss": 2.2707,
      "step": 4455
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3301730155944824,
      "learning_rate": 0.00014830775900354735,
      "loss": 2.009,
      "step": 4456
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9697083234786987,
      "learning_rate": 0.0001482867518251506,
      "loss": 1.4421,
      "step": 4457
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8289637565612793,
      "learning_rate": 0.0001482657418675735,
      "loss": 1.5607,
      "step": 4458
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2084732055664062,
      "learning_rate": 0.00014824472913202525,
      "loss": 1.5064,
      "step": 4459
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8917791843414307,
      "learning_rate": 0.00014822371361971533,
      "loss": 1.3118,
      "step": 4460
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8793929815292358,
      "learning_rate": 0.00014820269533185324,
      "loss": 1.6066,
      "step": 4461
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.9990002512931824,
      "learning_rate": 0.00014818167426964876,
      "loss": 1.4588,
      "step": 4462
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.8904057741165161,
      "learning_rate": 0.0001481606504343118,
      "loss": 1.0918,
      "step": 4463
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3913358449935913,
      "learning_rate": 0.0001481396238270523,
      "loss": 1.4538,
      "step": 4464
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.8626667261123657,
      "learning_rate": 0.00014811859444908052,
      "loss": 1.1761,
      "step": 4465
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1824320554733276,
      "learning_rate": 0.0001480975623016068,
      "loss": 1.2061,
      "step": 4466
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1265395879745483,
      "learning_rate": 0.00014807652738584173,
      "loss": 1.7372,
      "step": 4467
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.636709213256836,
      "learning_rate": 0.00014805548970299592,
      "loss": 1.724,
      "step": 4468
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0654757022857666,
      "learning_rate": 0.00014803444925428022,
      "loss": 1.6349,
      "step": 4469
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.00728702545166,
      "learning_rate": 0.00014801340604090566,
      "loss": 1.3177,
      "step": 4470
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2274006605148315,
      "learning_rate": 0.00014799236006408331,
      "loss": 1.7797,
      "step": 4471
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5076647996902466,
      "learning_rate": 0.00014797131132502465,
      "loss": 1.4012,
      "step": 4472
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3862544298171997,
      "learning_rate": 0.000147950259824941,
      "loss": 1.3521,
      "step": 4473
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3200876712799072,
      "learning_rate": 0.00014792920556504406,
      "loss": 1.6334,
      "step": 4474
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3083959817886353,
      "learning_rate": 0.00014790814854654562,
      "loss": 1.9762,
      "step": 4475
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3617932796478271,
      "learning_rate": 0.00014788708877065766,
      "loss": 1.4016,
      "step": 4476
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0623948574066162,
      "learning_rate": 0.00014786602623859221,
      "loss": 1.3223,
      "step": 4477
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.293806791305542,
      "learning_rate": 0.00014784496095156163,
      "loss": 1.8386,
      "step": 4478
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1183040142059326,
      "learning_rate": 0.0001478238929107783,
      "loss": 1.3186,
      "step": 4479
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.322185516357422,
      "learning_rate": 0.0001478028221174548,
      "loss": 2.1632,
      "step": 4480
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2975387573242188,
      "learning_rate": 0.0001477817485728039,
      "loss": 1.4635,
      "step": 4481
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.867294430732727,
      "learning_rate": 0.0001477606722780385,
      "loss": 0.931,
      "step": 4482
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3850772380828857,
      "learning_rate": 0.0001477395932343717,
      "loss": 1.8346,
      "step": 4483
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2471989393234253,
      "learning_rate": 0.00014771851144301662,
      "loss": 1.6078,
      "step": 4484
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3477357625961304,
      "learning_rate": 0.0001476974269051867,
      "loss": 1.5094,
      "step": 4485
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3435885906219482,
      "learning_rate": 0.00014767633962209552,
      "loss": 1.4741,
      "step": 4486
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1450308561325073,
      "learning_rate": 0.00014765524959495672,
      "loss": 0.8851,
      "step": 4487
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.313694715499878,
      "learning_rate": 0.00014763415682498413,
      "loss": 1.2712,
      "step": 4488
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.8284798264503479,
      "learning_rate": 0.00014761306131339178,
      "loss": 1.5746,
      "step": 4489
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.832689046859741,
      "learning_rate": 0.00014759196306139387,
      "loss": 2.124,
      "step": 4490
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8967775106430054,
      "learning_rate": 0.00014757086207020472,
      "loss": 1.6428,
      "step": 4491
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.0957869291305542,
      "learning_rate": 0.00014754975834103877,
      "loss": 0.7114,
      "step": 4492
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.735701322555542,
      "learning_rate": 0.0001475286518751107,
      "loss": 1.7965,
      "step": 4493
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6216776371002197,
      "learning_rate": 0.00014750754267363529,
      "loss": 1.4266,
      "step": 4494
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4829694032669067,
      "learning_rate": 0.00014748643073782752,
      "loss": 1.3802,
      "step": 4495
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.156836986541748,
      "learning_rate": 0.0001474653160689025,
      "loss": 1.165,
      "step": 4496
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4225432872772217,
      "learning_rate": 0.00014744419866807542,
      "loss": 1.8791,
      "step": 4497
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2989468574523926,
      "learning_rate": 0.00014742307853656184,
      "loss": 1.4766,
      "step": 4498
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.8087917566299438,
      "learning_rate": 0.00014740195567557724,
      "loss": 1.3556,
      "step": 4499
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2460134029388428,
      "learning_rate": 0.00014738083008633744,
      "loss": 1.7837,
      "step": 4500
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6977500915527344,
      "learning_rate": 0.00014735970177005828,
      "loss": 1.6867,
      "step": 4501
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7882604598999023,
      "learning_rate": 0.00014733857072795584,
      "loss": 1.6034,
      "step": 4502
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.111688256263733,
      "learning_rate": 0.00014731743696124632,
      "loss": 1.5576,
      "step": 4503
    },
    {
      "epoch": 0.34,
      "grad_norm": 4.255923748016357,
      "learning_rate": 0.0001472963004711461,
      "loss": 1.504,
      "step": 4504
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2216087579727173,
      "learning_rate": 0.00014727516125887175,
      "loss": 2.0207,
      "step": 4505
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.005633592605591,
      "learning_rate": 0.00014725401932563984,
      "loss": 1.0205,
      "step": 4506
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8313499689102173,
      "learning_rate": 0.00014723287467266732,
      "loss": 1.4565,
      "step": 4507
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6107401847839355,
      "learning_rate": 0.00014721172730117118,
      "loss": 1.2684,
      "step": 4508
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2769877910614014,
      "learning_rate": 0.00014719057721236848,
      "loss": 1.1712,
      "step": 4509
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.01560640335083,
      "learning_rate": 0.00014716942440747664,
      "loss": 1.2377,
      "step": 4510
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5930826663970947,
      "learning_rate": 0.00014714826888771306,
      "loss": 1.8071,
      "step": 4511
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6713573932647705,
      "learning_rate": 0.0001471271106542953,
      "loss": 1.2611,
      "step": 4512
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.63997220993042,
      "learning_rate": 0.0001471059497084413,
      "loss": 1.8519,
      "step": 4513
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1606199741363525,
      "learning_rate": 0.0001470847860513689,
      "loss": 1.2963,
      "step": 4514
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9768801927566528,
      "learning_rate": 0.00014706361968429618,
      "loss": 1.9936,
      "step": 4515
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4102002382278442,
      "learning_rate": 0.0001470424506084414,
      "loss": 1.6682,
      "step": 4516
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.200756549835205,
      "learning_rate": 0.00014702127882502302,
      "loss": 1.9528,
      "step": 4517
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1156235933303833,
      "learning_rate": 0.00014700010433525945,
      "loss": 1.9242,
      "step": 4518
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3234959840774536,
      "learning_rate": 0.00014697892714036958,
      "loss": 1.0316,
      "step": 4519
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1982320547103882,
      "learning_rate": 0.0001469577472415722,
      "loss": 1.7244,
      "step": 4520
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.064739465713501,
      "learning_rate": 0.00014693656464008628,
      "loss": 1.5588,
      "step": 4521
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9144338369369507,
      "learning_rate": 0.00014691537933713105,
      "loss": 1.5449,
      "step": 4522
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4071195125579834,
      "learning_rate": 0.0001468941913339259,
      "loss": 1.6508,
      "step": 4523
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4035762548446655,
      "learning_rate": 0.00014687300063169026,
      "loss": 1.5157,
      "step": 4524
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0020573139190674,
      "learning_rate": 0.00014685180723164376,
      "loss": 1.7787,
      "step": 4525
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.653214931488037,
      "learning_rate": 0.00014683061113500624,
      "loss": 1.8886,
      "step": 4526
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5928109884262085,
      "learning_rate": 0.00014680941234299763,
      "loss": 1.949,
      "step": 4527
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.9655845165252686,
      "learning_rate": 0.0001467882108568381,
      "loss": 1.8058,
      "step": 4528
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3145861625671387,
      "learning_rate": 0.00014676700667774789,
      "loss": 1.9575,
      "step": 4529
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2610679864883423,
      "learning_rate": 0.00014674579980694736,
      "loss": 2.3421,
      "step": 4530
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1667557954788208,
      "learning_rate": 0.00014672459024565713,
      "loss": 1.8539,
      "step": 4531
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2099521160125732,
      "learning_rate": 0.00014670337799509799,
      "loss": 1.3387,
      "step": 4532
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1679062843322754,
      "learning_rate": 0.00014668216305649076,
      "loss": 1.5915,
      "step": 4533
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2049400806427002,
      "learning_rate": 0.00014666094543105648,
      "loss": 1.6363,
      "step": 4534
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3672597408294678,
      "learning_rate": 0.0001466397251200164,
      "loss": 1.6105,
      "step": 4535
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3928625583648682,
      "learning_rate": 0.0001466185021245918,
      "loss": 1.8123,
      "step": 4536
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1155762672424316,
      "learning_rate": 0.00014659727644600424,
      "loss": 1.7639,
      "step": 4537
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.026477336883545,
      "learning_rate": 0.00014657604808547537,
      "loss": 1.6102,
      "step": 4538
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.588047742843628,
      "learning_rate": 0.00014655481704422695,
      "loss": 0.7844,
      "step": 4539
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5526833534240723,
      "learning_rate": 0.000146533583323481,
      "loss": 1.3853,
      "step": 4540
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5901175737380981,
      "learning_rate": 0.00014651234692445968,
      "loss": 1.4762,
      "step": 4541
    },
    {
      "epoch": 0.35,
      "grad_norm": 4.224655628204346,
      "learning_rate": 0.0001464911078483852,
      "loss": 2.7552,
      "step": 4542
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4428174495697021,
      "learning_rate": 0.00014646986609648003,
      "loss": 1.3757,
      "step": 4543
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.522678017616272,
      "learning_rate": 0.00014644862166996673,
      "loss": 1.2456,
      "step": 4544
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7182953357696533,
      "learning_rate": 0.00014642737457006802,
      "loss": 1.3224,
      "step": 4545
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5626697540283203,
      "learning_rate": 0.00014640612479800686,
      "loss": 2.2184,
      "step": 4546
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4341918230056763,
      "learning_rate": 0.00014638487235500624,
      "loss": 1.6647,
      "step": 4547
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.476347804069519,
      "learning_rate": 0.0001463636172422894,
      "loss": 1.1469,
      "step": 4548
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2342956066131592,
      "learning_rate": 0.00014634235946107962,
      "loss": 1.4888,
      "step": 4549
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4509872198104858,
      "learning_rate": 0.00014632109901260048,
      "loss": 1.3765,
      "step": 4550
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.784266710281372,
      "learning_rate": 0.00014629983589807565,
      "loss": 1.8277,
      "step": 4551
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1589548587799072,
      "learning_rate": 0.00014627857011872893,
      "loss": 1.4095,
      "step": 4552
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.418220043182373,
      "learning_rate": 0.00014625730167578424,
      "loss": 1.7468,
      "step": 4553
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.368406891822815,
      "learning_rate": 0.00014623603057046576,
      "loss": 1.5508,
      "step": 4554
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.317148447036743,
      "learning_rate": 0.0001462147568039977,
      "loss": 2.2214,
      "step": 4555
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5641534328460693,
      "learning_rate": 0.00014619348037760456,
      "loss": 2.1323,
      "step": 4556
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.630319595336914,
      "learning_rate": 0.00014617220129251087,
      "loss": 1.6802,
      "step": 4557
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6945000886917114,
      "learning_rate": 0.0001461509195499414,
      "loss": 1.1081,
      "step": 4558
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4898240566253662,
      "learning_rate": 0.00014612963515112096,
      "loss": 1.3088,
      "step": 4559
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2538115978240967,
      "learning_rate": 0.0001461083480972747,
      "loss": 1.6857,
      "step": 4560
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2133078575134277,
      "learning_rate": 0.00014608705838962778,
      "loss": 1.4831,
      "step": 4561
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1491906642913818,
      "learning_rate": 0.00014606576602940548,
      "loss": 1.2109,
      "step": 4562
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1952307224273682,
      "learning_rate": 0.00014604447101783337,
      "loss": 1.1887,
      "step": 4563
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.398737907409668,
      "learning_rate": 0.00014602317335613707,
      "loss": 1.1679,
      "step": 4564
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7144157886505127,
      "learning_rate": 0.00014600187304554237,
      "loss": 1.4006,
      "step": 4565
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.160975694656372,
      "learning_rate": 0.00014598057008727525,
      "loss": 1.3608,
      "step": 4566
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4044872522354126,
      "learning_rate": 0.00014595926448256183,
      "loss": 1.5746,
      "step": 4567
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9752459526062012,
      "learning_rate": 0.00014593795623262833,
      "loss": 1.4757,
      "step": 4568
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6624596118927002,
      "learning_rate": 0.00014591664533870118,
      "loss": 1.9,
      "step": 4569
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0109347105026245,
      "learning_rate": 0.00014589533180200693,
      "loss": 1.2021,
      "step": 4570
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4237961769104004,
      "learning_rate": 0.00014587401562377234,
      "loss": 0.8086,
      "step": 4571
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4020570516586304,
      "learning_rate": 0.00014585269680522428,
      "loss": 1.8256,
      "step": 4572
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.068350076675415,
      "learning_rate": 0.00014583137534758967,
      "loss": 1.4325,
      "step": 4573
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4634747505187988,
      "learning_rate": 0.0001458100512520958,
      "loss": 0.574,
      "step": 4574
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1410452127456665,
      "learning_rate": 0.00014578872451996997,
      "loss": 1.2612,
      "step": 4575
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4160981178283691,
      "learning_rate": 0.0001457673951524396,
      "loss": 2.0789,
      "step": 4576
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4459658861160278,
      "learning_rate": 0.00014574606315073235,
      "loss": 1.7384,
      "step": 4577
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0492205619812012,
      "learning_rate": 0.000145724728516076,
      "loss": 1.5821,
      "step": 4578
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8465696573257446,
      "learning_rate": 0.00014570339124969853,
      "loss": 1.2002,
      "step": 4579
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2973432540893555,
      "learning_rate": 0.00014568205135282795,
      "loss": 1.7306,
      "step": 4580
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6165293455123901,
      "learning_rate": 0.0001456607088266925,
      "loss": 1.5672,
      "step": 4581
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3728132247924805,
      "learning_rate": 0.00014563936367252063,
      "loss": 1.4881,
      "step": 4582
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2214107513427734,
      "learning_rate": 0.00014561801589154077,
      "loss": 2.2814,
      "step": 4583
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8623450994491577,
      "learning_rate": 0.0001455966654849817,
      "loss": 1.8758,
      "step": 4584
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3330786228179932,
      "learning_rate": 0.00014557531245407225,
      "loss": 1.9047,
      "step": 4585
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4407893419265747,
      "learning_rate": 0.00014555395680004136,
      "loss": 0.9563,
      "step": 4586
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5979174375534058,
      "learning_rate": 0.0001455325985241182,
      "loss": 1.8662,
      "step": 4587
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2212975025177002,
      "learning_rate": 0.00014551123762753207,
      "loss": 1.2939,
      "step": 4588
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3959681987762451,
      "learning_rate": 0.0001454898741115124,
      "loss": 0.9452,
      "step": 4589
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9636567234992981,
      "learning_rate": 0.00014546850797728878,
      "loss": 1.5267,
      "step": 4590
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.065824031829834,
      "learning_rate": 0.000145447139226091,
      "loss": 2.0562,
      "step": 4591
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1245476007461548,
      "learning_rate": 0.00014542576785914885,
      "loss": 2.1146,
      "step": 4592
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8585587739944458,
      "learning_rate": 0.00014540439387769247,
      "loss": 1.2609,
      "step": 4593
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.656599998474121,
      "learning_rate": 0.00014538301728295202,
      "loss": 1.0858,
      "step": 4594
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0551822185516357,
      "learning_rate": 0.00014536163807615785,
      "loss": 1.4933,
      "step": 4595
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6789510250091553,
      "learning_rate": 0.0001453402562585405,
      "loss": 1.4717,
      "step": 4596
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6692001819610596,
      "learning_rate": 0.00014531887183133054,
      "loss": 0.9387,
      "step": 4597
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4939743280410767,
      "learning_rate": 0.00014529748479575876,
      "loss": 1.929,
      "step": 4598
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1104589700698853,
      "learning_rate": 0.0001452760951530562,
      "loss": 1.2399,
      "step": 4599
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.28596830368042,
      "learning_rate": 0.00014525470290445392,
      "loss": 1.4713,
      "step": 4600
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5100581645965576,
      "learning_rate": 0.0001452333080511831,
      "loss": 1.9478,
      "step": 4601
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.7246224880218506,
      "learning_rate": 0.00014521191059447522,
      "loss": 0.8908,
      "step": 4602
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.914602518081665,
      "learning_rate": 0.00014519051053556175,
      "loss": 1.1619,
      "step": 4603
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.9784300327301025,
      "learning_rate": 0.0001451691078756745,
      "loss": 2.7606,
      "step": 4604
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.403975009918213,
      "learning_rate": 0.00014514770261604522,
      "loss": 1.5936,
      "step": 4605
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6026129722595215,
      "learning_rate": 0.00014512629475790594,
      "loss": 1.3141,
      "step": 4606
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.183790683746338,
      "learning_rate": 0.00014510488430248875,
      "loss": 1.7751,
      "step": 4607
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3625357151031494,
      "learning_rate": 0.00014508347125102606,
      "loss": 1.5817,
      "step": 4608
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2234821319580078,
      "learning_rate": 0.00014506205560475024,
      "loss": 1.4718,
      "step": 4609
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4337098598480225,
      "learning_rate": 0.00014504063736489383,
      "loss": 1.667,
      "step": 4610
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.0184414386749268,
      "learning_rate": 0.0001450192165326897,
      "loss": 1.8341,
      "step": 4611
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6246976852416992,
      "learning_rate": 0.00014499779310937064,
      "loss": 1.8829,
      "step": 4612
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.127962589263916,
      "learning_rate": 0.00014497636709616976,
      "loss": 1.2499,
      "step": 4613
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4528034925460815,
      "learning_rate": 0.00014495493849432022,
      "loss": 1.2396,
      "step": 4614
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0919824838638306,
      "learning_rate": 0.00014493350730505533,
      "loss": 1.2976,
      "step": 4615
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.834120988845825,
      "learning_rate": 0.00014491207352960861,
      "loss": 1.763,
      "step": 4616
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.759223461151123,
      "learning_rate": 0.0001448906371692137,
      "loss": 1.2897,
      "step": 4617
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5351746082305908,
      "learning_rate": 0.00014486919822510438,
      "loss": 1.8207,
      "step": 4618
    },
    {
      "epoch": 0.35,
      "grad_norm": 7.6699957847595215,
      "learning_rate": 0.0001448477566985146,
      "loss": 2.8507,
      "step": 4619
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9551420211791992,
      "learning_rate": 0.0001448263125906784,
      "loss": 1.1881,
      "step": 4620
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0152502059936523,
      "learning_rate": 0.00014480486590283005,
      "loss": 1.5091,
      "step": 4621
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3888064622879028,
      "learning_rate": 0.00014478341663620392,
      "loss": 1.478,
      "step": 4622
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5256450176239014,
      "learning_rate": 0.00014476196479203454,
      "loss": 2.186,
      "step": 4623
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3961105346679688,
      "learning_rate": 0.00014474051037155657,
      "loss": 1.3907,
      "step": 4624
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9188404083251953,
      "learning_rate": 0.00014471905337600486,
      "loss": 1.759,
      "step": 4625
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4244757890701294,
      "learning_rate": 0.0001446975938066144,
      "loss": 1.9264,
      "step": 4626
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5880340337753296,
      "learning_rate": 0.00014467613166462023,
      "loss": 0.9312,
      "step": 4627
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7502360343933105,
      "learning_rate": 0.0001446546669512577,
      "loss": 1.6217,
      "step": 4628
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1444557905197144,
      "learning_rate": 0.00014463319966776222,
      "loss": 1.4309,
      "step": 4629
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.49064302444458,
      "learning_rate": 0.0001446117298153693,
      "loss": 1.4806,
      "step": 4630
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7031499147415161,
      "learning_rate": 0.00014459025739531472,
      "loss": 1.7474,
      "step": 4631
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2927334308624268,
      "learning_rate": 0.00014456878240883434,
      "loss": 1.2516,
      "step": 4632
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.695161819458008,
      "learning_rate": 0.00014454730485716411,
      "loss": 1.3892,
      "step": 4633
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4603791236877441,
      "learning_rate": 0.00014452582474154023,
      "loss": 1.6359,
      "step": 4634
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.351331353187561,
      "learning_rate": 0.00014450434206319897,
      "loss": 1.6245,
      "step": 4635
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2240735292434692,
      "learning_rate": 0.00014448285682337682,
      "loss": 0.9022,
      "step": 4636
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3646703958511353,
      "learning_rate": 0.0001444613690233104,
      "loss": 1.4056,
      "step": 4637
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.0182299613952637,
      "learning_rate": 0.00014443987866423637,
      "loss": 1.4401,
      "step": 4638
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.9992247223854065,
      "learning_rate": 0.00014441838574739166,
      "loss": 1.0664,
      "step": 4639
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5748580694198608,
      "learning_rate": 0.00014439689027401334,
      "loss": 1.727,
      "step": 4640
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1880230903625488,
      "learning_rate": 0.0001443753922453386,
      "loss": 1.7455,
      "step": 4641
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4877437353134155,
      "learning_rate": 0.0001443538916626048,
      "loss": 1.3415,
      "step": 4642
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3379395008087158,
      "learning_rate": 0.00014433238852704932,
      "loss": 1.2834,
      "step": 4643
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.226199746131897,
      "learning_rate": 0.00014431088283990985,
      "loss": 1.1221,
      "step": 4644
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0361111164093018,
      "learning_rate": 0.00014428937460242417,
      "loss": 1.3094,
      "step": 4645
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.177597165107727,
      "learning_rate": 0.00014426786381583023,
      "loss": 0.9895,
      "step": 4646
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8854343891143799,
      "learning_rate": 0.00014424635048136604,
      "loss": 2.2638,
      "step": 4647
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2337586879730225,
      "learning_rate": 0.00014422483460026984,
      "loss": 1.4315,
      "step": 4648
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4173954725265503,
      "learning_rate": 0.00014420331617378003,
      "loss": 1.5704,
      "step": 4649
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5021097660064697,
      "learning_rate": 0.00014418179520313506,
      "loss": 1.8551,
      "step": 4650
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2485747337341309,
      "learning_rate": 0.00014416027168957362,
      "loss": 1.9337,
      "step": 4651
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5528265237808228,
      "learning_rate": 0.0001441387456343345,
      "loss": 1.4569,
      "step": 4652
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1904053688049316,
      "learning_rate": 0.00014411721703865666,
      "loss": 2.5694,
      "step": 4653
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2333979606628418,
      "learning_rate": 0.00014409568590377918,
      "loss": 1.483,
      "step": 4654
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.7978873252868652,
      "learning_rate": 0.00014407415223094132,
      "loss": 1.9575,
      "step": 4655
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3909506797790527,
      "learning_rate": 0.00014405261602138246,
      "loss": 1.8258,
      "step": 4656
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9555137157440186,
      "learning_rate": 0.0001440310772763421,
      "loss": 1.5669,
      "step": 4657
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8696558475494385,
      "learning_rate": 0.00014400953599705995,
      "loss": 2.1497,
      "step": 4658
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1127580404281616,
      "learning_rate": 0.00014398799218477584,
      "loss": 0.9708,
      "step": 4659
    },
    {
      "epoch": 0.36,
      "grad_norm": 6.675897598266602,
      "learning_rate": 0.00014396644584072972,
      "loss": 2.4211,
      "step": 4660
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.575772762298584,
      "learning_rate": 0.00014394489696616173,
      "loss": 1.0978,
      "step": 4661
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8538860082626343,
      "learning_rate": 0.00014392334556231212,
      "loss": 1.8891,
      "step": 4662
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6666158437728882,
      "learning_rate": 0.00014390179163042128,
      "loss": 2.2045,
      "step": 4663
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0475229024887085,
      "learning_rate": 0.00014388023517172978,
      "loss": 1.1655,
      "step": 4664
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.644235372543335,
      "learning_rate": 0.00014385867618747834,
      "loss": 1.7118,
      "step": 4665
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2281994819641113,
      "learning_rate": 0.00014383711467890774,
      "loss": 1.5846,
      "step": 4666
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0227603912353516,
      "learning_rate": 0.00014381555064725904,
      "loss": 1.6387,
      "step": 4667
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4632048606872559,
      "learning_rate": 0.00014379398409377335,
      "loss": 1.4093,
      "step": 4668
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4386008977890015,
      "learning_rate": 0.00014377241501969192,
      "loss": 1.4203,
      "step": 4669
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.86616849899292,
      "learning_rate": 0.00014375084342625622,
      "loss": 1.4481,
      "step": 4670
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.33364999294281,
      "learning_rate": 0.0001437292693147078,
      "loss": 1.2913,
      "step": 4671
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3198100328445435,
      "learning_rate": 0.00014370769268628832,
      "loss": 1.484,
      "step": 4672
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.173474907875061,
      "learning_rate": 0.0001436861135422397,
      "loss": 1.5384,
      "step": 4673
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.326912760734558,
      "learning_rate": 0.00014366453188380398,
      "loss": 1.3691,
      "step": 4674
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.542722225189209,
      "learning_rate": 0.00014364294771222324,
      "loss": 1.9914,
      "step": 4675
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.533758521080017,
      "learning_rate": 0.00014362136102873978,
      "loss": 1.5296,
      "step": 4676
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0439972877502441,
      "learning_rate": 0.00014359977183459608,
      "loss": 1.62,
      "step": 4677
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.457637071609497,
      "learning_rate": 0.00014357818013103465,
      "loss": 1.127,
      "step": 4678
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.734755277633667,
      "learning_rate": 0.0001435565859192983,
      "loss": 1.4842,
      "step": 4679
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.124298095703125,
      "learning_rate": 0.00014353498920062987,
      "loss": 1.1465,
      "step": 4680
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0016939640045166,
      "learning_rate": 0.00014351338997627234,
      "loss": 1.754,
      "step": 4681
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0822315216064453,
      "learning_rate": 0.00014349178824746888,
      "loss": 1.5006,
      "step": 4682
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.275702476501465,
      "learning_rate": 0.00014347018401546287,
      "loss": 2.1056,
      "step": 4683
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5405220985412598,
      "learning_rate": 0.00014344857728149768,
      "loss": 1.9127,
      "step": 4684
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.411712169647217,
      "learning_rate": 0.0001434269680468169,
      "loss": 1.462,
      "step": 4685
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.585253357887268,
      "learning_rate": 0.00014340535631266428,
      "loss": 1.692,
      "step": 4686
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.384765148162842,
      "learning_rate": 0.00014338374208028374,
      "loss": 0.906,
      "step": 4687
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9263368844985962,
      "learning_rate": 0.00014336212535091927,
      "loss": 1.3572,
      "step": 4688
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.334609031677246,
      "learning_rate": 0.00014334050612581503,
      "loss": 1.308,
      "step": 4689
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9284206628799438,
      "learning_rate": 0.00014331888440621533,
      "loss": 1.5646,
      "step": 4690
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3333666324615479,
      "learning_rate": 0.00014329726019336462,
      "loss": 1.9496,
      "step": 4691
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.274942398071289,
      "learning_rate": 0.00014327563348850753,
      "loss": 1.5204,
      "step": 4692
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.325111150741577,
      "learning_rate": 0.00014325400429288877,
      "loss": 1.3976,
      "step": 4693
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3382940292358398,
      "learning_rate": 0.00014323237260775327,
      "loss": 1.5192,
      "step": 4694
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2660554647445679,
      "learning_rate": 0.00014321073843434596,
      "loss": 1.2259,
      "step": 4695
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4040240049362183,
      "learning_rate": 0.0001431891017739121,
      "loss": 1.7007,
      "step": 4696
    },
    {
      "epoch": 0.36,
      "grad_norm": 4.0013556480407715,
      "learning_rate": 0.00014316746262769704,
      "loss": 3.0042,
      "step": 4697
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.632957100868225,
      "learning_rate": 0.0001431458209969461,
      "loss": 2.3559,
      "step": 4698
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.061020851135254,
      "learning_rate": 0.000143124176882905,
      "loss": 2.3992,
      "step": 4699
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.156458616256714,
      "learning_rate": 0.0001431025302868194,
      "loss": 1.1273,
      "step": 4700
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3944278955459595,
      "learning_rate": 0.00014308088120993525,
      "loss": 1.0843,
      "step": 4701
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.428362488746643,
      "learning_rate": 0.00014305922965349857,
      "loss": 1.1643,
      "step": 4702
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.425614595413208,
      "learning_rate": 0.00014303757561875553,
      "loss": 1.386,
      "step": 4703
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.840698480606079,
      "learning_rate": 0.0001430159191069524,
      "loss": 1.0922,
      "step": 4704
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0822882652282715,
      "learning_rate": 0.00014299426011933568,
      "loss": 1.7329,
      "step": 4705
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0367766618728638,
      "learning_rate": 0.00014297259865715195,
      "loss": 1.475,
      "step": 4706
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.1524009704589844,
      "learning_rate": 0.00014295093472164796,
      "loss": 1.8288,
      "step": 4707
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8234890699386597,
      "learning_rate": 0.00014292926831407061,
      "loss": 2.2353,
      "step": 4708
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0797525644302368,
      "learning_rate": 0.0001429075994356669,
      "loss": 0.9417,
      "step": 4709
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.870431661605835,
      "learning_rate": 0.00014288592808768397,
      "loss": 1.798,
      "step": 4710
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.104449987411499,
      "learning_rate": 0.0001428642542713692,
      "loss": 1.8497,
      "step": 4711
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1683495044708252,
      "learning_rate": 0.00014284257798797005,
      "loss": 1.3607,
      "step": 4712
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4269853830337524,
      "learning_rate": 0.00014282089923873403,
      "loss": 1.6779,
      "step": 4713
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2375226020812988,
      "learning_rate": 0.00014279921802490893,
      "loss": 1.3354,
      "step": 4714
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4545918703079224,
      "learning_rate": 0.00014277753434774264,
      "loss": 1.3388,
      "step": 4715
    },
    {
      "epoch": 0.36,
      "grad_norm": 4.5647478103637695,
      "learning_rate": 0.00014275584820848316,
      "loss": 1.4671,
      "step": 4716
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3110169172286987,
      "learning_rate": 0.00014273415960837864,
      "loss": 1.2949,
      "step": 4717
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.338243007659912,
      "learning_rate": 0.00014271246854867742,
      "loss": 1.9706,
      "step": 4718
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1488240957260132,
      "learning_rate": 0.0001426907750306279,
      "loss": 1.4633,
      "step": 4719
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.723637580871582,
      "learning_rate": 0.0001426690790554787,
      "loss": 1.9472,
      "step": 4720
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0483969449996948,
      "learning_rate": 0.0001426473806244785,
      "loss": 1.4332,
      "step": 4721
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4383939504623413,
      "learning_rate": 0.00014262567973887625,
      "loss": 0.8732,
      "step": 4722
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7785323858261108,
      "learning_rate": 0.00014260397639992095,
      "loss": 1.7373,
      "step": 4723
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2464513778686523,
      "learning_rate": 0.00014258227060886166,
      "loss": 1.7187,
      "step": 4724
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7607121467590332,
      "learning_rate": 0.00014256056236694776,
      "loss": 1.0708,
      "step": 4725
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.735565423965454,
      "learning_rate": 0.00014253885167542866,
      "loss": 1.5439,
      "step": 4726
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2484235763549805,
      "learning_rate": 0.00014251713853555392,
      "loss": 2.2517,
      "step": 4727
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8290636539459229,
      "learning_rate": 0.00014249542294857327,
      "loss": 1.5034,
      "step": 4728
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3739898204803467,
      "learning_rate": 0.0001424737049157366,
      "loss": 1.6547,
      "step": 4729
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0948336124420166,
      "learning_rate": 0.00014245198443829383,
      "loss": 1.393,
      "step": 4730
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.264770269393921,
      "learning_rate": 0.0001424302615174952,
      "loss": 2.0088,
      "step": 4731
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.537085771560669,
      "learning_rate": 0.0001424085361545909,
      "loss": 1.2026,
      "step": 4732
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1371333599090576,
      "learning_rate": 0.00014238680835083136,
      "loss": 1.398,
      "step": 4733
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2274277210235596,
      "learning_rate": 0.00014236507810746715,
      "loss": 1.3787,
      "step": 4734
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6668750047683716,
      "learning_rate": 0.00014234334542574906,
      "loss": 1.5855,
      "step": 4735
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7640788555145264,
      "learning_rate": 0.0001423216103069278,
      "loss": 1.4688,
      "step": 4736
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8054031133651733,
      "learning_rate": 0.0001422998727522544,
      "loss": 1.4696,
      "step": 4737
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5345458984375,
      "learning_rate": 0.00014227813276297997,
      "loss": 1.5596,
      "step": 4738
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3288494348526,
      "learning_rate": 0.0001422563903403558,
      "loss": 1.5862,
      "step": 4739
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.821587324142456,
      "learning_rate": 0.0001422346454856333,
      "loss": 2.0428,
      "step": 4740
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.583672046661377,
      "learning_rate": 0.00014221289820006398,
      "loss": 2.0907,
      "step": 4741
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6707183122634888,
      "learning_rate": 0.00014219114848489954,
      "loss": 1.4044,
      "step": 4742
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6613625288009644,
      "learning_rate": 0.00014216939634139174,
      "loss": 1.7301,
      "step": 4743
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.796006441116333,
      "learning_rate": 0.00014214764177079264,
      "loss": 1.5694,
      "step": 4744
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3879358768463135,
      "learning_rate": 0.0001421258847743543,
      "loss": 1.5426,
      "step": 4745
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4293782711029053,
      "learning_rate": 0.00014210412535332892,
      "loss": 1.9639,
      "step": 4746
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5206223726272583,
      "learning_rate": 0.00014208236350896896,
      "loss": 1.5647,
      "step": 4747
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.263917326927185,
      "learning_rate": 0.00014206059924252686,
      "loss": 1.6569,
      "step": 4748
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.624234914779663,
      "learning_rate": 0.00014203883255525533,
      "loss": 1.675,
      "step": 4749
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.273993968963623,
      "learning_rate": 0.00014201706344840712,
      "loss": 1.9217,
      "step": 4750
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1148083209991455,
      "learning_rate": 0.00014199529192323522,
      "loss": 1.2994,
      "step": 4751
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4323738813400269,
      "learning_rate": 0.00014197351798099267,
      "loss": 1.3291,
      "step": 4752
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.307230830192566,
      "learning_rate": 0.0001419517416229327,
      "loss": 1.0609,
      "step": 4753
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0596060752868652,
      "learning_rate": 0.0001419299628503087,
      "loss": 1.815,
      "step": 4754
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4198983907699585,
      "learning_rate": 0.0001419081816643741,
      "loss": 1.7677,
      "step": 4755
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0975817441940308,
      "learning_rate": 0.00014188639806638257,
      "loss": 1.3439,
      "step": 4756
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3274370431900024,
      "learning_rate": 0.00014186461205758786,
      "loss": 1.578,
      "step": 4757
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.05366587638855,
      "learning_rate": 0.0001418428236392439,
      "loss": 2.2643,
      "step": 4758
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0976150035858154,
      "learning_rate": 0.00014182103281260475,
      "loss": 1.1561,
      "step": 4759
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3328657150268555,
      "learning_rate": 0.00014179923957892453,
      "loss": 1.2379,
      "step": 4760
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8304604291915894,
      "learning_rate": 0.00014177744393945767,
      "loss": 1.4067,
      "step": 4761
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8370641469955444,
      "learning_rate": 0.00014175564589545854,
      "loss": 1.3159,
      "step": 4762
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3244484663009644,
      "learning_rate": 0.0001417338454481818,
      "loss": 1.294,
      "step": 4763
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1882503032684326,
      "learning_rate": 0.0001417120425988822,
      "loss": 1.7684,
      "step": 4764
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2283087968826294,
      "learning_rate": 0.00014169023734881452,
      "loss": 1.7931,
      "step": 4765
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.334620714187622,
      "learning_rate": 0.0001416684296992339,
      "loss": 2.0267,
      "step": 4766
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5381288528442383,
      "learning_rate": 0.00014164661965139543,
      "loss": 1.1635,
      "step": 4767
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7301822900772095,
      "learning_rate": 0.00014162480720655442,
      "loss": 1.5716,
      "step": 4768
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.136345624923706,
      "learning_rate": 0.00014160299236596631,
      "loss": 1.1878,
      "step": 4769
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.903820514678955,
      "learning_rate": 0.00014158117513088665,
      "loss": 1.5577,
      "step": 4770
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.459264874458313,
      "learning_rate": 0.00014155935550257115,
      "loss": 2.1481,
      "step": 4771
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1136224269866943,
      "learning_rate": 0.00014153753348227567,
      "loss": 1.4778,
      "step": 4772
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4423179626464844,
      "learning_rate": 0.0001415157090712562,
      "loss": 1.5215,
      "step": 4773
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3218517303466797,
      "learning_rate": 0.00014149388227076886,
      "loss": 1.9081,
      "step": 4774
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.0025198459625244,
      "learning_rate": 0.00014147205308206984,
      "loss": 1.8283,
      "step": 4775
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.000257968902588,
      "learning_rate": 0.00014145022150641564,
      "loss": 1.1742,
      "step": 4776
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6600810289382935,
      "learning_rate": 0.0001414283875450627,
      "loss": 1.2583,
      "step": 4777
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4627596139907837,
      "learning_rate": 0.00014140655119926777,
      "loss": 1.2308,
      "step": 4778
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.342621088027954,
      "learning_rate": 0.0001413847124702876,
      "loss": 1.3423,
      "step": 4779
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.78360116481781,
      "learning_rate": 0.00014136287135937915,
      "loss": 1.7416,
      "step": 4780
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.711036205291748,
      "learning_rate": 0.0001413410278677995,
      "loss": 1.6134,
      "step": 4781
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4495316743850708,
      "learning_rate": 0.0001413191819968059,
      "loss": 1.649,
      "step": 4782
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1687138080596924,
      "learning_rate": 0.00014129733374765568,
      "loss": 1.0618,
      "step": 4783
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2366399765014648,
      "learning_rate": 0.0001412754831216063,
      "loss": 1.6382,
      "step": 4784
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.440494418144226,
      "learning_rate": 0.00014125363011991543,
      "loss": 1.5145,
      "step": 4785
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0429611206054688,
      "learning_rate": 0.00014123177474384085,
      "loss": 1.2832,
      "step": 4786
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3338514566421509,
      "learning_rate": 0.00014120991699464043,
      "loss": 1.5862,
      "step": 4787
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3494302034378052,
      "learning_rate": 0.00014118805687357222,
      "loss": 1.5173,
      "step": 4788
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5411194562911987,
      "learning_rate": 0.0001411661943818944,
      "loss": 1.3416,
      "step": 4789
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8765228986740112,
      "learning_rate": 0.00014114432952086524,
      "loss": 1.7623,
      "step": 4790
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3756392002105713,
      "learning_rate": 0.00014112246229174325,
      "loss": 1.3166,
      "step": 4791
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5926713943481445,
      "learning_rate": 0.00014110059269578705,
      "loss": 1.3099,
      "step": 4792
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4817962646484375,
      "learning_rate": 0.0001410787207342552,
      "loss": 1.5913,
      "step": 4793
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4458866119384766,
      "learning_rate": 0.00014105684640840674,
      "loss": 1.0548,
      "step": 4794
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5101104974746704,
      "learning_rate": 0.00014103496971950053,
      "loss": 1.6165,
      "step": 4795
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7706270217895508,
      "learning_rate": 0.00014101309066879577,
      "loss": 1.5085,
      "step": 4796
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2672208547592163,
      "learning_rate": 0.00014099120925755172,
      "loss": 1.3665,
      "step": 4797
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0065053701400757,
      "learning_rate": 0.00014096932548702778,
      "loss": 0.6008,
      "step": 4798
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.32711660861969,
      "learning_rate": 0.0001409474393584834,
      "loss": 1.6682,
      "step": 4799
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2372921705245972,
      "learning_rate": 0.00014092555087317837,
      "loss": 1.4154,
      "step": 4800
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9237911701202393,
      "learning_rate": 0.00014090366003237248,
      "loss": 1.2798,
      "step": 4801
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3170667886734009,
      "learning_rate": 0.00014088176683732564,
      "loss": 1.1659,
      "step": 4802
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2914016246795654,
      "learning_rate": 0.0001408598712892979,
      "loss": 1.2158,
      "step": 4803
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8116153478622437,
      "learning_rate": 0.00014083797338954957,
      "loss": 1.6891,
      "step": 4804
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.663894772529602,
      "learning_rate": 0.0001408160731393409,
      "loss": 2.1081,
      "step": 4805
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1187617778778076,
      "learning_rate": 0.00014079417053993246,
      "loss": 1.6433,
      "step": 4806
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6556470394134521,
      "learning_rate": 0.00014077226559258482,
      "loss": 1.7542,
      "step": 4807
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.451167106628418,
      "learning_rate": 0.0001407503582985587,
      "loss": 1.6034,
      "step": 4808
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0825560092926025,
      "learning_rate": 0.00014072844865911505,
      "loss": 1.0891,
      "step": 4809
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7625327110290527,
      "learning_rate": 0.0001407065366755149,
      "loss": 1.0461,
      "step": 4810
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0099765062332153,
      "learning_rate": 0.0001406846223490194,
      "loss": 1.4722,
      "step": 4811
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3274434804916382,
      "learning_rate": 0.00014066270568088984,
      "loss": 1.8989,
      "step": 4812
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4072903394699097,
      "learning_rate": 0.0001406407866723876,
      "loss": 1.4691,
      "step": 4813
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2447935342788696,
      "learning_rate": 0.00014061886532477433,
      "loss": 1.2739,
      "step": 4814
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1571733951568604,
      "learning_rate": 0.0001405969416393117,
      "loss": 2.1253,
      "step": 4815
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.235490560531616,
      "learning_rate": 0.00014057501561726157,
      "loss": 1.1161,
      "step": 4816
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.803687334060669,
      "learning_rate": 0.00014055308725988578,
      "loss": 2.5451,
      "step": 4817
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5544935464859009,
      "learning_rate": 0.0001405311565684466,
      "loss": 1.2704,
      "step": 4818
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1622295379638672,
      "learning_rate": 0.00014050922354420618,
      "loss": 1.608,
      "step": 4819
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0256221294403076,
      "learning_rate": 0.0001404872881884269,
      "loss": 1.4197,
      "step": 4820
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1881015300750732,
      "learning_rate": 0.0001404653505023713,
      "loss": 1.73,
      "step": 4821
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2402089834213257,
      "learning_rate": 0.00014044341048730197,
      "loss": 1.5547,
      "step": 4822
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.773850917816162,
      "learning_rate": 0.0001404214681444817,
      "loss": 1.9416,
      "step": 4823
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.804592490196228,
      "learning_rate": 0.00014039952347517343,
      "loss": 1.0978,
      "step": 4824
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.9465498328208923,
      "learning_rate": 0.00014037757648064018,
      "loss": 0.9202,
      "step": 4825
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.374707579612732,
      "learning_rate": 0.0001403556271621451,
      "loss": 1.4422,
      "step": 4826
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6321706771850586,
      "learning_rate": 0.00014033367552095154,
      "loss": 1.8245,
      "step": 4827
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4156073331832886,
      "learning_rate": 0.00014031172155832292,
      "loss": 0.9621,
      "step": 4828
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5854122638702393,
      "learning_rate": 0.00014028976527552284,
      "loss": 0.9926,
      "step": 4829
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9168097972869873,
      "learning_rate": 0.00014026780667381498,
      "loss": 1.8803,
      "step": 4830
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5811275243759155,
      "learning_rate": 0.00014024584575446318,
      "loss": 1.5923,
      "step": 4831
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.896295189857483,
      "learning_rate": 0.00014022388251873146,
      "loss": 1.4275,
      "step": 4832
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5140430927276611,
      "learning_rate": 0.0001402019169678839,
      "loss": 1.8255,
      "step": 4833
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1113272905349731,
      "learning_rate": 0.00014017994910318476,
      "loss": 1.4596,
      "step": 4834
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.279624581336975,
      "learning_rate": 0.0001401579789258984,
      "loss": 1.2895,
      "step": 4835
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.68662428855896,
      "learning_rate": 0.00014013600643728933,
      "loss": 1.8884,
      "step": 4836
    },
    {
      "epoch": 0.37,
      "grad_norm": 6.4478349685668945,
      "learning_rate": 0.0001401140316386222,
      "loss": 2.0596,
      "step": 4837
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2435882091522217,
      "learning_rate": 0.00014009205453116176,
      "loss": 1.4666,
      "step": 4838
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1573587656021118,
      "learning_rate": 0.000140070075116173,
      "loss": 1.3192,
      "step": 4839
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.346070647239685,
      "learning_rate": 0.00014004809339492088,
      "loss": 1.3071,
      "step": 4840
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8665891885757446,
      "learning_rate": 0.0001400261093686706,
      "loss": 1.2182,
      "step": 4841
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5855655670166016,
      "learning_rate": 0.00014000412303868747,
      "loss": 1.3076,
      "step": 4842
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2994427680969238,
      "learning_rate": 0.0001399821344062369,
      "loss": 1.711,
      "step": 4843
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2885098457336426,
      "learning_rate": 0.00013996014347258452,
      "loss": 1.6782,
      "step": 4844
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6255030632019043,
      "learning_rate": 0.000139938150238996,
      "loss": 1.9037,
      "step": 4845
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3154581785202026,
      "learning_rate": 0.0001399161547067372,
      "loss": 1.7308,
      "step": 4846
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4580237865447998,
      "learning_rate": 0.00013989415687707405,
      "loss": 1.0086,
      "step": 4847
    },
    {
      "epoch": 0.37,
      "grad_norm": 5.828329086303711,
      "learning_rate": 0.00013987215675127267,
      "loss": 1.4939,
      "step": 4848
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0017248392105103,
      "learning_rate": 0.0001398501543305993,
      "loss": 0.7963,
      "step": 4849
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4993019104003906,
      "learning_rate": 0.00013982814961632033,
      "loss": 2.1034,
      "step": 4850
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7795298099517822,
      "learning_rate": 0.0001398061426097022,
      "loss": 1.5573,
      "step": 4851
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.052475929260254,
      "learning_rate": 0.00013978413331201158,
      "loss": 2.1451,
      "step": 4852
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0319907665252686,
      "learning_rate": 0.00013976212172451521,
      "loss": 1.6836,
      "step": 4853
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.579232692718506,
      "learning_rate": 0.00013974010784848005,
      "loss": 1.1936,
      "step": 4854
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4866912364959717,
      "learning_rate": 0.00013971809168517298,
      "loss": 1.5227,
      "step": 4855
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5563329458236694,
      "learning_rate": 0.0001396960732358613,
      "loss": 1.9776,
      "step": 4856
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1118462085723877,
      "learning_rate": 0.00013967405250181226,
      "loss": 1.7267,
      "step": 4857
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5238248109817505,
      "learning_rate": 0.00013965202948429325,
      "loss": 2.0184,
      "step": 4858
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3084253072738647,
      "learning_rate": 0.00013963000418457181,
      "loss": 1.1968,
      "step": 4859
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.760988712310791,
      "learning_rate": 0.0001396079766039157,
      "loss": 2.0176,
      "step": 4860
    },
    {
      "epoch": 0.37,
      "grad_norm": 6.972135066986084,
      "learning_rate": 0.00013958594674359263,
      "loss": 1.965,
      "step": 4861
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3751068115234375,
      "learning_rate": 0.00013956391460487067,
      "loss": 1.4113,
      "step": 4862
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4860543012619019,
      "learning_rate": 0.00013954188018901778,
      "loss": 1.7049,
      "step": 4863
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8364241123199463,
      "learning_rate": 0.0001395198434973022,
      "loss": 1.5543,
      "step": 4864
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5125114917755127,
      "learning_rate": 0.0001394978045309923,
      "loss": 1.5831,
      "step": 4865
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9891771078109741,
      "learning_rate": 0.00013947576329135654,
      "loss": 1.1901,
      "step": 4866
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7324035167694092,
      "learning_rate": 0.0001394537197796635,
      "loss": 2.0344,
      "step": 4867
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6932780742645264,
      "learning_rate": 0.00013943167399718193,
      "loss": 1.8387,
      "step": 4868
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0491358041763306,
      "learning_rate": 0.00013940962594518068,
      "loss": 1.6443,
      "step": 4869
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5569112300872803,
      "learning_rate": 0.00013938757562492873,
      "loss": 1.2572,
      "step": 4870
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5512970685958862,
      "learning_rate": 0.0001393655230376952,
      "loss": 1.5771,
      "step": 4871
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3218185901641846,
      "learning_rate": 0.00013934346818474943,
      "loss": 1.122,
      "step": 4872
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.292922258377075,
      "learning_rate": 0.0001393214110673607,
      "loss": 2.5073,
      "step": 4873
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4326939582824707,
      "learning_rate": 0.00013929935168679856,
      "loss": 1.6734,
      "step": 4874
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1594586372375488,
      "learning_rate": 0.00013927729004433264,
      "loss": 1.1006,
      "step": 4875
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.596077799797058,
      "learning_rate": 0.00013925522614123274,
      "loss": 1.9057,
      "step": 4876
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8652129173278809,
      "learning_rate": 0.00013923315997876876,
      "loss": 1.0947,
      "step": 4877
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6748578548431396,
      "learning_rate": 0.00013921109155821073,
      "loss": 1.4671,
      "step": 4878
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2196334600448608,
      "learning_rate": 0.00013918902088082876,
      "loss": 0.9573,
      "step": 4879
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4954994916915894,
      "learning_rate": 0.00013916694794789325,
      "loss": 1.0334,
      "step": 4880
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6296417713165283,
      "learning_rate": 0.00013914487276067456,
      "loss": 1.541,
      "step": 4881
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3755481243133545,
      "learning_rate": 0.00013912279532044325,
      "loss": 1.4455,
      "step": 4882
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3541762828826904,
      "learning_rate": 0.00013910071562847,
      "loss": 0.7947,
      "step": 4883
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3109917640686035,
      "learning_rate": 0.00013907863368602566,
      "loss": 1.1201,
      "step": 4884
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.882735013961792,
      "learning_rate": 0.0001390565494943811,
      "loss": 1.6291,
      "step": 4885
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4292287826538086,
      "learning_rate": 0.0001390344630548075,
      "loss": 1.8835,
      "step": 4886
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.0554888248443604,
      "learning_rate": 0.00013901237436857592,
      "loss": 0.9606,
      "step": 4887
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4993194341659546,
      "learning_rate": 0.00013899028343695782,
      "loss": 1.8415,
      "step": 4888
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2754411697387695,
      "learning_rate": 0.0001389681902612246,
      "loss": 1.673,
      "step": 4889
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.671828269958496,
      "learning_rate": 0.00013894609484264788,
      "loss": 1.5441,
      "step": 4890
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.226874589920044,
      "learning_rate": 0.00013892399718249933,
      "loss": 1.4343,
      "step": 4891
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4983937740325928,
      "learning_rate": 0.00013890189728205083,
      "loss": 1.6977,
      "step": 4892
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5145894289016724,
      "learning_rate": 0.00013887979514257432,
      "loss": 0.8275,
      "step": 4893
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.8811713457107544,
      "learning_rate": 0.00013885769076534197,
      "loss": 1.3947,
      "step": 4894
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.192142128944397,
      "learning_rate": 0.00013883558415162602,
      "loss": 1.1551,
      "step": 4895
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5085290670394897,
      "learning_rate": 0.0001388134753026987,
      "loss": 1.365,
      "step": 4896
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4556795358657837,
      "learning_rate": 0.00013879136421983266,
      "loss": 1.3345,
      "step": 4897
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.101272702217102,
      "learning_rate": 0.00013876925090430044,
      "loss": 1.7705,
      "step": 4898
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3834998607635498,
      "learning_rate": 0.00013874713535737477,
      "loss": 2.1447,
      "step": 4899
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.09559965133667,
      "learning_rate": 0.00013872501758032863,
      "loss": 1.4466,
      "step": 4900
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5609545707702637,
      "learning_rate": 0.00013870289757443493,
      "loss": 1.8144,
      "step": 4901
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1588319540023804,
      "learning_rate": 0.0001386807753409668,
      "loss": 1.4669,
      "step": 4902
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3487929105758667,
      "learning_rate": 0.00013865865088119754,
      "loss": 1.4105,
      "step": 4903
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1400139331817627,
      "learning_rate": 0.00013863652419640054,
      "loss": 1.6674,
      "step": 4904
    },
    {
      "epoch": 0.37,
      "grad_norm": 10.953752517700195,
      "learning_rate": 0.0001386143952878493,
      "loss": 2.4057,
      "step": 4905
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.55707585811615,
      "learning_rate": 0.0001385922641568175,
      "loss": 2.1402,
      "step": 4906
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2163203954696655,
      "learning_rate": 0.00013857013080457886,
      "loss": 0.976,
      "step": 4907
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4430294036865234,
      "learning_rate": 0.00013854799523240732,
      "loss": 1.4057,
      "step": 4908
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3514596223831177,
      "learning_rate": 0.0001385258574415769,
      "loss": 2.1924,
      "step": 4909
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.1168718338012695,
      "learning_rate": 0.00013850371743336177,
      "loss": 1.7191,
      "step": 4910
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.340019702911377,
      "learning_rate": 0.00013848157520903617,
      "loss": 1.9229,
      "step": 4911
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3229097127914429,
      "learning_rate": 0.00013845943076987456,
      "loss": 1.0919,
      "step": 4912
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4270182847976685,
      "learning_rate": 0.00013843728411715148,
      "loss": 1.2782,
      "step": 4913
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.9064888954162598,
      "learning_rate": 0.0001384151352521416,
      "loss": 0.9889,
      "step": 4914
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9804637432098389,
      "learning_rate": 0.00013839298417611963,
      "loss": 1.7853,
      "step": 4915
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3768287897109985,
      "learning_rate": 0.0001383708308903606,
      "loss": 1.578,
      "step": 4916
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8382983207702637,
      "learning_rate": 0.00013834867539613949,
      "loss": 1.8776,
      "step": 4917
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9466862678527832,
      "learning_rate": 0.00013832651769473151,
      "loss": 1.0517,
      "step": 4918
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.683895468711853,
      "learning_rate": 0.00013830435778741198,
      "loss": 1.7737,
      "step": 4919
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9140626192092896,
      "learning_rate": 0.0001382821956754563,
      "loss": 2.2927,
      "step": 4920
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9212226867675781,
      "learning_rate": 0.00013826003136014006,
      "loss": 1.4739,
      "step": 4921
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1433651447296143,
      "learning_rate": 0.00013823786484273886,
      "loss": 1.2554,
      "step": 4922
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0728081464767456,
      "learning_rate": 0.00013821569612452862,
      "loss": 1.1283,
      "step": 4923
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.623018503189087,
      "learning_rate": 0.0001381935252067852,
      "loss": 1.7554,
      "step": 4924
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1466881036758423,
      "learning_rate": 0.00013817135209078467,
      "loss": 1.6175,
      "step": 4925
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7438157796859741,
      "learning_rate": 0.00013814917677780323,
      "loss": 1.3772,
      "step": 4926
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2690154314041138,
      "learning_rate": 0.00013812699926911724,
      "loss": 1.747,
      "step": 4927
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.6129190921783447,
      "learning_rate": 0.00013810481956600312,
      "loss": 1.6214,
      "step": 4928
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.12873113155365,
      "learning_rate": 0.00013808263766973742,
      "loss": 1.8519,
      "step": 4929
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1090675592422485,
      "learning_rate": 0.00013806045358159683,
      "loss": 1.4203,
      "step": 4930
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4978630542755127,
      "learning_rate": 0.0001380382673028582,
      "loss": 0.9634,
      "step": 4931
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2732754945755005,
      "learning_rate": 0.00013801607883479845,
      "loss": 1.4596,
      "step": 4932
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3211495876312256,
      "learning_rate": 0.00013799388817869467,
      "loss": 1.6747,
      "step": 4933
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3224916458129883,
      "learning_rate": 0.00013797169533582407,
      "loss": 1.5655,
      "step": 4934
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3859456777572632,
      "learning_rate": 0.00013794950030746397,
      "loss": 1.4838,
      "step": 4935
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4997590780258179,
      "learning_rate": 0.00013792730309489178,
      "loss": 1.1786,
      "step": 4936
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.7405507564544678,
      "learning_rate": 0.00013790510369938517,
      "loss": 1.497,
      "step": 4937
    },
    {
      "epoch": 0.38,
      "grad_norm": 4.805537700653076,
      "learning_rate": 0.00013788290212222174,
      "loss": 1.7925,
      "step": 4938
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8929044008255005,
      "learning_rate": 0.00013786069836467938,
      "loss": 1.5767,
      "step": 4939
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3790549039840698,
      "learning_rate": 0.00013783849242803603,
      "loss": 1.08,
      "step": 4940
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2121869325637817,
      "learning_rate": 0.00013781628431356975,
      "loss": 1.5961,
      "step": 4941
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2090153694152832,
      "learning_rate": 0.00013779407402255878,
      "loss": 1.908,
      "step": 4942
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0981218814849854,
      "learning_rate": 0.00013777186155628144,
      "loss": 1.8819,
      "step": 4943
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.947140634059906,
      "learning_rate": 0.00013774964691601614,
      "loss": 1.5008,
      "step": 4944
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1592917442321777,
      "learning_rate": 0.00013772743010304154,
      "loss": 1.4555,
      "step": 4945
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2573368549346924,
      "learning_rate": 0.00013770521111863628,
      "loss": 1.3249,
      "step": 4946
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.405853509902954,
      "learning_rate": 0.00013768298996407925,
      "loss": 1.2299,
      "step": 4947
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2191989421844482,
      "learning_rate": 0.00013766076664064933,
      "loss": 2.1756,
      "step": 4948
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6117336750030518,
      "learning_rate": 0.00013763854114962567,
      "loss": 1.2683,
      "step": 4949
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9991106390953064,
      "learning_rate": 0.00013761631349228738,
      "loss": 1.6574,
      "step": 4950
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2920905351638794,
      "learning_rate": 0.0001375940836699139,
      "loss": 1.5309,
      "step": 4951
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0001037120819092,
      "learning_rate": 0.00013757185168378468,
      "loss": 1.5854,
      "step": 4952
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.051777720451355,
      "learning_rate": 0.00013754961753517918,
      "loss": 1.4845,
      "step": 4953
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.806489109992981,
      "learning_rate": 0.00013752738122537722,
      "loss": 1.9649,
      "step": 4954
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4473856687545776,
      "learning_rate": 0.0001375051427556586,
      "loss": 1.5019,
      "step": 4955
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9970136284828186,
      "learning_rate": 0.00013748290212730325,
      "loss": 1.2973,
      "step": 4956
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.624610424041748,
      "learning_rate": 0.00013746065934159123,
      "loss": 1.9879,
      "step": 4957
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.80873441696167,
      "learning_rate": 0.00013743841439980276,
      "loss": 1.6948,
      "step": 4958
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9346280097961426,
      "learning_rate": 0.00013741616730321817,
      "loss": 0.8934,
      "step": 4959
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0653152465820312,
      "learning_rate": 0.00013739391805311793,
      "loss": 1.555,
      "step": 4960
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1442492008209229,
      "learning_rate": 0.0001373716666507826,
      "loss": 1.6663,
      "step": 4961
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3931516408920288,
      "learning_rate": 0.00013734941309749286,
      "loss": 1.0916,
      "step": 4962
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1596425771713257,
      "learning_rate": 0.00013732715739452956,
      "loss": 1.7718,
      "step": 4963
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3006867170333862,
      "learning_rate": 0.00013730489954317355,
      "loss": 1.0067,
      "step": 4964
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.712308883666992,
      "learning_rate": 0.000137282639544706,
      "loss": 1.3273,
      "step": 4965
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3314162492752075,
      "learning_rate": 0.00013726037740040812,
      "loss": 1.5737,
      "step": 4966
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.299976110458374,
      "learning_rate": 0.0001372381131115611,
      "loss": 1.1048,
      "step": 4967
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3511189222335815,
      "learning_rate": 0.00013721584667944652,
      "loss": 1.9843,
      "step": 4968
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.033622145652771,
      "learning_rate": 0.00013719357810534582,
      "loss": 1.0816,
      "step": 4969
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2352983951568604,
      "learning_rate": 0.00013717130739054076,
      "loss": 1.295,
      "step": 4970
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.372182607650757,
      "learning_rate": 0.0001371490345363131,
      "loss": 1.7988,
      "step": 4971
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.378261685371399,
      "learning_rate": 0.00013712675954394483,
      "loss": 1.7818,
      "step": 4972
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3100059032440186,
      "learning_rate": 0.00013710448241471793,
      "loss": 1.4447,
      "step": 4973
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4062902927398682,
      "learning_rate": 0.00013708220314991466,
      "loss": 1.536,
      "step": 4974
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.340796709060669,
      "learning_rate": 0.00013705992175081728,
      "loss": 1.5448,
      "step": 4975
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6017473936080933,
      "learning_rate": 0.00013703763821870817,
      "loss": 1.6421,
      "step": 4976
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.8455267548561096,
      "learning_rate": 0.00013701535255486992,
      "loss": 0.9207,
      "step": 4977
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5812890529632568,
      "learning_rate": 0.0001369930647605852,
      "loss": 2.2958,
      "step": 4978
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7669167518615723,
      "learning_rate": 0.0001369707748371368,
      "loss": 1.8939,
      "step": 4979
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1029216051101685,
      "learning_rate": 0.00013694848278580763,
      "loss": 1.5934,
      "step": 4980
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6011275053024292,
      "learning_rate": 0.00013692618860788072,
      "loss": 1.3085,
      "step": 4981
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3050174713134766,
      "learning_rate": 0.0001369038923046392,
      "loss": 1.4395,
      "step": 4982
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1501567363739014,
      "learning_rate": 0.00013688159387736644,
      "loss": 1.4701,
      "step": 4983
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8662433624267578,
      "learning_rate": 0.00013685929332734573,
      "loss": 1.9798,
      "step": 4984
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.850564956665039,
      "learning_rate": 0.0001368369906558607,
      "loss": 1.1322,
      "step": 4985
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9269436597824097,
      "learning_rate": 0.00013681468586419491,
      "loss": 1.828,
      "step": 4986
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4587019681930542,
      "learning_rate": 0.00013679237895363217,
      "loss": 1.4485,
      "step": 4987
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7064177989959717,
      "learning_rate": 0.00013677006992545636,
      "loss": 1.3664,
      "step": 4988
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.5739293098449707,
      "learning_rate": 0.0001367477587809515,
      "loss": 2.4013,
      "step": 4989
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.531435251235962,
      "learning_rate": 0.00013672544552140176,
      "loss": 1.6739,
      "step": 4990
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8171948194503784,
      "learning_rate": 0.00013670313014809131,
      "loss": 1.3733,
      "step": 4991
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8165870904922485,
      "learning_rate": 0.0001366808126623046,
      "loss": 1.1866,
      "step": 4992
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.3137123584747314,
      "learning_rate": 0.00013665849306532612,
      "loss": 1.7958,
      "step": 4993
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3939965963363647,
      "learning_rate": 0.00013663617135844047,
      "loss": 1.3734,
      "step": 4994
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3473610877990723,
      "learning_rate": 0.0001366138475429324,
      "loss": 2.0667,
      "step": 4995
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3542035818099976,
      "learning_rate": 0.00013659152162008676,
      "loss": 1.1326,
      "step": 4996
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0441012382507324,
      "learning_rate": 0.00013656919359118856,
      "loss": 1.5766,
      "step": 4997
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2050797939300537,
      "learning_rate": 0.00013654686345752293,
      "loss": 1.5756,
      "step": 4998
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.743741512298584,
      "learning_rate": 0.00013652453122037504,
      "loss": 1.6284,
      "step": 4999
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7366589307785034,
      "learning_rate": 0.00013650219688103027,
      "loss": 1.2518,
      "step": 5000
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0570976734161377,
      "learning_rate": 0.00013647986044077408,
      "loss": 1.9629,
      "step": 5001
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2815136909484863,
      "learning_rate": 0.00013645752190089206,
      "loss": 1.8593,
      "step": 5002
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3024239540100098,
      "learning_rate": 0.00013643518126266993,
      "loss": 1.4081,
      "step": 5003
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.8269206285476685,
      "learning_rate": 0.0001364128385273935,
      "loss": 0.834,
      "step": 5004
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0887295007705688,
      "learning_rate": 0.00013639049369634876,
      "loss": 1.0451,
      "step": 5005
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6336250305175781,
      "learning_rate": 0.00013636814677082175,
      "loss": 0.7717,
      "step": 5006
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.179847002029419,
      "learning_rate": 0.0001363457977520987,
      "loss": 1.052,
      "step": 5007
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.7965946197509766,
      "learning_rate": 0.0001363234466414659,
      "loss": 1.8113,
      "step": 5008
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.8880289793014526,
      "learning_rate": 0.00013630109344020977,
      "loss": 1.3729,
      "step": 5009
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2450649738311768,
      "learning_rate": 0.0001362787381496169,
      "loss": 1.5181,
      "step": 5010
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5824049711227417,
      "learning_rate": 0.00013625638077097396,
      "loss": 1.4046,
      "step": 5011
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9222576022148132,
      "learning_rate": 0.0001362340213055677,
      "loss": 1.2428,
      "step": 5012
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.9958406090736389,
      "learning_rate": 0.00013621165975468512,
      "loss": 1.3544,
      "step": 5013
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2625010013580322,
      "learning_rate": 0.00013618929611961317,
      "loss": 1.1025,
      "step": 5014
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8420453071594238,
      "learning_rate": 0.00013616693040163904,
      "loss": 1.3375,
      "step": 5015
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1289467811584473,
      "learning_rate": 0.00013614456260205,
      "loss": 1.3863,
      "step": 5016
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.632033109664917,
      "learning_rate": 0.0001361221927221335,
      "loss": 1.4985,
      "step": 5017
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4162304401397705,
      "learning_rate": 0.000136099820763177,
      "loss": 1.4357,
      "step": 5018
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5987801551818848,
      "learning_rate": 0.00013607744672646812,
      "loss": 1.1161,
      "step": 5019
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1807022094726562,
      "learning_rate": 0.00013605507061329464,
      "loss": 2.0077,
      "step": 5020
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.0527971982955933,
      "learning_rate": 0.00013603269242494444,
      "loss": 1.199,
      "step": 5021
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3770370483398438,
      "learning_rate": 0.0001360103121627055,
      "loss": 0.9059,
      "step": 5022
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.721771240234375,
      "learning_rate": 0.00013598792982786595,
      "loss": 1.8387,
      "step": 5023
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5213967561721802,
      "learning_rate": 0.000135965545421714,
      "loss": 1.5254,
      "step": 5024
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6170166730880737,
      "learning_rate": 0.00013594315894553802,
      "loss": 2.2395,
      "step": 5025
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6275492906570435,
      "learning_rate": 0.00013592077040062647,
      "loss": 1.4373,
      "step": 5026
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4718719720840454,
      "learning_rate": 0.00013589837978826794,
      "loss": 1.2557,
      "step": 5027
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2134023904800415,
      "learning_rate": 0.00013587598710975115,
      "loss": 1.0289,
      "step": 5028
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.8884190917015076,
      "learning_rate": 0.0001358535923663649,
      "loss": 0.9254,
      "step": 5029
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4427021741867065,
      "learning_rate": 0.00013583119555939815,
      "loss": 0.8823,
      "step": 5030
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4971596002578735,
      "learning_rate": 0.00013580879669014,
      "loss": 1.8392,
      "step": 5031
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5342342853546143,
      "learning_rate": 0.00013578639575987958,
      "loss": 1.4608,
      "step": 5032
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1858937740325928,
      "learning_rate": 0.00013576399276990625,
      "loss": 0.8922,
      "step": 5033
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.8295083045959473,
      "learning_rate": 0.00013574158772150939,
      "loss": 1.9947,
      "step": 5034
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5677895545959473,
      "learning_rate": 0.0001357191806159785,
      "loss": 0.9799,
      "step": 5035
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2712950706481934,
      "learning_rate": 0.00013569677145460333,
      "loss": 1.2915,
      "step": 5036
    },
    {
      "epoch": 0.38,
      "grad_norm": 4.221943378448486,
      "learning_rate": 0.00013567436023867362,
      "loss": 1.2346,
      "step": 5037
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.8884012699127197,
      "learning_rate": 0.0001356519469694792,
      "loss": 2.7323,
      "step": 5038
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2414971590042114,
      "learning_rate": 0.00013562953164831019,
      "loss": 1.4305,
      "step": 5039
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.502983808517456,
      "learning_rate": 0.00013560711427645662,
      "loss": 1.3602,
      "step": 5040
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.091454029083252,
      "learning_rate": 0.0001355846948552088,
      "loss": 1.3728,
      "step": 5041
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.114667296409607,
      "learning_rate": 0.00013556227338585713,
      "loss": 1.4451,
      "step": 5042
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1424773931503296,
      "learning_rate": 0.00013553984986969201,
      "loss": 1.5791,
      "step": 5043
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.262434482574463,
      "learning_rate": 0.00013551742430800408,
      "loss": 1.7499,
      "step": 5044
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1442986726760864,
      "learning_rate": 0.00013549499670208407,
      "loss": 1.4096,
      "step": 5045
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2211248874664307,
      "learning_rate": 0.00013547256705322282,
      "loss": 1.6092,
      "step": 5046
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.60997474193573,
      "learning_rate": 0.00013545013536271123,
      "loss": 1.8501,
      "step": 5047
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.8930718898773193,
      "learning_rate": 0.00013542770163184044,
      "loss": 1.8781,
      "step": 5048
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.063201427459717,
      "learning_rate": 0.00013540526586190162,
      "loss": 1.6255,
      "step": 5049
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9396337270736694,
      "learning_rate": 0.0001353828280541861,
      "loss": 2.3187,
      "step": 5050
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2316563129425049,
      "learning_rate": 0.00013536038820998524,
      "loss": 1.921,
      "step": 5051
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0715713500976562,
      "learning_rate": 0.00013533794633059065,
      "loss": 1.6044,
      "step": 5052
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3378578424453735,
      "learning_rate": 0.0001353155024172939,
      "loss": 2.008,
      "step": 5053
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.185689091682434,
      "learning_rate": 0.00013529305647138687,
      "loss": 1.7586,
      "step": 5054
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2579371929168701,
      "learning_rate": 0.00013527060849416144,
      "loss": 1.2557,
      "step": 5055
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.247096300125122,
      "learning_rate": 0.00013524815848690958,
      "loss": 1.0073,
      "step": 5056
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0311052799224854,
      "learning_rate": 0.0001352257064509234,
      "loss": 1.6245,
      "step": 5057
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3296802043914795,
      "learning_rate": 0.00013520325238749518,
      "loss": 1.6059,
      "step": 5058
    },
    {
      "epoch": 0.39,
      "grad_norm": 7.225274085998535,
      "learning_rate": 0.00013518079629791724,
      "loss": 2.4082,
      "step": 5059
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6171907186508179,
      "learning_rate": 0.00013515833818348216,
      "loss": 2.1861,
      "step": 5060
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5108816623687744,
      "learning_rate": 0.00013513587804548242,
      "loss": 1.3819,
      "step": 5061
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2641584873199463,
      "learning_rate": 0.00013511341588521076,
      "loss": 1.7689,
      "step": 5062
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2454909086227417,
      "learning_rate": 0.00013509095170396003,
      "loss": 1.3808,
      "step": 5063
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.8655984997749329,
      "learning_rate": 0.0001350684855030232,
      "loss": 0.9681,
      "step": 5064
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.575998067855835,
      "learning_rate": 0.00013504601728369327,
      "loss": 1.1829,
      "step": 5065
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0513197183609009,
      "learning_rate": 0.0001350235470472634,
      "loss": 1.2214,
      "step": 5066
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1441677808761597,
      "learning_rate": 0.00013500107479502696,
      "loss": 0.8243,
      "step": 5067
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.402011752128601,
      "learning_rate": 0.0001349786005282773,
      "loss": 1.2355,
      "step": 5068
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2530802488327026,
      "learning_rate": 0.00013495612424830797,
      "loss": 0.9796,
      "step": 5069
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.690915107727051,
      "learning_rate": 0.0001349336459564126,
      "loss": 1.5815,
      "step": 5070
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4055347442626953,
      "learning_rate": 0.0001349111656538849,
      "loss": 1.4896,
      "step": 5071
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.319747805595398,
      "learning_rate": 0.00013488868334201882,
      "loss": 1.2619,
      "step": 5072
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.173698902130127,
      "learning_rate": 0.00013486619902210832,
      "loss": 1.1352,
      "step": 5073
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1797417402267456,
      "learning_rate": 0.00013484371269544747,
      "loss": 1.3246,
      "step": 5074
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.252306580543518,
      "learning_rate": 0.00013482122436333052,
      "loss": 1.2632,
      "step": 5075
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6850838661193848,
      "learning_rate": 0.00013479873402705178,
      "loss": 2.0121,
      "step": 5076
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1224522590637207,
      "learning_rate": 0.0001347762416879057,
      "loss": 1.4661,
      "step": 5077
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.571304440498352,
      "learning_rate": 0.0001347537473471869,
      "loss": 0.9486,
      "step": 5078
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0130935907363892,
      "learning_rate": 0.00013473125100619,
      "loss": 1.3918,
      "step": 5079
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2779375314712524,
      "learning_rate": 0.00013470875266620978,
      "loss": 1.4529,
      "step": 5080
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6643198728561401,
      "learning_rate": 0.00013468625232854116,
      "loss": 1.2826,
      "step": 5081
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.022444486618042,
      "learning_rate": 0.0001346637499944792,
      "loss": 1.6354,
      "step": 5082
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.330570697784424,
      "learning_rate": 0.00013464124566531905,
      "loss": 1.4107,
      "step": 5083
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.431730031967163,
      "learning_rate": 0.00013461873934235592,
      "loss": 1.8325,
      "step": 5084
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4355841875076294,
      "learning_rate": 0.00013459623102688516,
      "loss": 1.0622,
      "step": 5085
    },
    {
      "epoch": 0.39,
      "grad_norm": 21.298389434814453,
      "learning_rate": 0.0001345737207202023,
      "loss": 1.6857,
      "step": 5086
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.1770384311676025,
      "learning_rate": 0.00013455120842360293,
      "loss": 2.3851,
      "step": 5087
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.218316912651062,
      "learning_rate": 0.00013452869413838273,
      "loss": 1.5386,
      "step": 5088
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.303115129470825,
      "learning_rate": 0.00013450617786583754,
      "loss": 0.957,
      "step": 5089
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7413170337677002,
      "learning_rate": 0.00013448365960726333,
      "loss": 1.416,
      "step": 5090
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8734557628631592,
      "learning_rate": 0.00013446113936395617,
      "loss": 1.7175,
      "step": 5091
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6951725482940674,
      "learning_rate": 0.00013443861713721214,
      "loss": 2.0251,
      "step": 5092
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4710822105407715,
      "learning_rate": 0.00013441609292832763,
      "loss": 1.5814,
      "step": 5093
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2078644037246704,
      "learning_rate": 0.00013439356673859896,
      "loss": 1.0496,
      "step": 5094
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4772757291793823,
      "learning_rate": 0.00013437103856932264,
      "loss": 1.5446,
      "step": 5095
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5939632654190063,
      "learning_rate": 0.0001343485084217954,
      "loss": 1.5098,
      "step": 5096
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1704583168029785,
      "learning_rate": 0.00013432597629731382,
      "loss": 0.7184,
      "step": 5097
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3698923587799072,
      "learning_rate": 0.0001343034421971749,
      "loss": 1.2997,
      "step": 5098
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1069610118865967,
      "learning_rate": 0.00013428090612267553,
      "loss": 1.5606,
      "step": 5099
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0113589763641357,
      "learning_rate": 0.00013425836807511276,
      "loss": 1.1651,
      "step": 5100
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0445308685302734,
      "learning_rate": 0.00013423582805578387,
      "loss": 1.4181,
      "step": 5101
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.9004268646240234,
      "learning_rate": 0.00013421328606598616,
      "loss": 1.3702,
      "step": 5102
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9343358278274536,
      "learning_rate": 0.00013419074210701694,
      "loss": 1.9139,
      "step": 5103
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7280064821243286,
      "learning_rate": 0.00013416819618017388,
      "loss": 1.3236,
      "step": 5104
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.180706262588501,
      "learning_rate": 0.00013414564828675456,
      "loss": 1.6408,
      "step": 5105
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.172105312347412,
      "learning_rate": 0.00013412309842805674,
      "loss": 2.0593,
      "step": 5106
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9265056848526,
      "learning_rate": 0.00013410054660537832,
      "loss": 1.1579,
      "step": 5107
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7188854217529297,
      "learning_rate": 0.00013407799282001728,
      "loss": 1.3741,
      "step": 5108
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2195043563842773,
      "learning_rate": 0.00013405543707327168,
      "loss": 1.2741,
      "step": 5109
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.084354043006897,
      "learning_rate": 0.00013403287936643977,
      "loss": 1.4415,
      "step": 5110
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4579274654388428,
      "learning_rate": 0.0001340103197008199,
      "loss": 1.2499,
      "step": 5111
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.719822645187378,
      "learning_rate": 0.00013398775807771046,
      "loss": 1.393,
      "step": 5112
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.5479137897491455,
      "learning_rate": 0.00013396519449841005,
      "loss": 1.933,
      "step": 5113
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6364425420761108,
      "learning_rate": 0.0001339426289642173,
      "loss": 2.0577,
      "step": 5114
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1706057786941528,
      "learning_rate": 0.00013392006147643095,
      "loss": 1.2772,
      "step": 5115
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1360245943069458,
      "learning_rate": 0.00013389749203635,
      "loss": 1.6075,
      "step": 5116
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.664654016494751,
      "learning_rate": 0.00013387492064527331,
      "loss": 1.3668,
      "step": 5117
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5754597187042236,
      "learning_rate": 0.00013385234730450008,
      "loss": 1.4529,
      "step": 5118
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.903942883014679,
      "learning_rate": 0.00013382977201532956,
      "loss": 1.5721,
      "step": 5119
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0242197513580322,
      "learning_rate": 0.00013380719477906102,
      "loss": 1.2523,
      "step": 5120
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2281737327575684,
      "learning_rate": 0.00013378461559699394,
      "loss": 1.2449,
      "step": 5121
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9526666402816772,
      "learning_rate": 0.00013376203447042787,
      "loss": 1.3608,
      "step": 5122
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.352303147315979,
      "learning_rate": 0.00013373945140066252,
      "loss": 1.796,
      "step": 5123
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.829776644706726,
      "learning_rate": 0.00013371686638899763,
      "loss": 2.198,
      "step": 5124
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6327837705612183,
      "learning_rate": 0.00013369427943673312,
      "loss": 1.6597,
      "step": 5125
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.239202857017517,
      "learning_rate": 0.000133671690545169,
      "loss": 1.2105,
      "step": 5126
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.316741943359375,
      "learning_rate": 0.0001336490997156054,
      "loss": 0.876,
      "step": 5127
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4565496444702148,
      "learning_rate": 0.0001336265069493425,
      "loss": 1.6846,
      "step": 5128
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8165477514266968,
      "learning_rate": 0.00013360391224768073,
      "loss": 1.414,
      "step": 5129
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6420000791549683,
      "learning_rate": 0.00013358131561192046,
      "loss": 1.4962,
      "step": 5130
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.109049081802368,
      "learning_rate": 0.0001335587170433623,
      "loss": 1.0983,
      "step": 5131
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.6194992065429688,
      "learning_rate": 0.00013353611654330695,
      "loss": 1.2905,
      "step": 5132
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.111204147338867,
      "learning_rate": 0.00013351351411305511,
      "loss": 1.5676,
      "step": 5133
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4361121654510498,
      "learning_rate": 0.0001334909097539078,
      "loss": 0.9696,
      "step": 5134
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.800842523574829,
      "learning_rate": 0.00013346830346716595,
      "loss": 1.2382,
      "step": 5135
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0324335098266602,
      "learning_rate": 0.0001334456952541307,
      "loss": 1.0209,
      "step": 5136
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.023496150970459,
      "learning_rate": 0.00013342308511610332,
      "loss": 1.7619,
      "step": 5137
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4697890281677246,
      "learning_rate": 0.00013340047305438507,
      "loss": 1.0262,
      "step": 5138
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7654985189437866,
      "learning_rate": 0.00013337785907027743,
      "loss": 1.4647,
      "step": 5139
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.7530330419540405,
      "learning_rate": 0.00013335524316508208,
      "loss": 0.9247,
      "step": 5140
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7471709251403809,
      "learning_rate": 0.00013333262534010056,
      "loss": 0.9253,
      "step": 5141
    },
    {
      "epoch": 0.39,
      "grad_norm": 4.087563514709473,
      "learning_rate": 0.00013331000559663466,
      "loss": 2.6683,
      "step": 5142
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.28925359249115,
      "learning_rate": 0.00013328738393598636,
      "loss": 1.9406,
      "step": 5143
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.0046379566192627,
      "learning_rate": 0.0001332647603594576,
      "loss": 1.7582,
      "step": 5144
    },
    {
      "epoch": 0.39,
      "grad_norm": 4.129087448120117,
      "learning_rate": 0.00013324213486835053,
      "loss": 2.1564,
      "step": 5145
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.227010488510132,
      "learning_rate": 0.00013321950746396737,
      "loss": 1.4202,
      "step": 5146
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.069840431213379,
      "learning_rate": 0.00013319687814761046,
      "loss": 1.5095,
      "step": 5147
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7252455949783325,
      "learning_rate": 0.00013317424692058222,
      "loss": 1.8077,
      "step": 5148
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4256103038787842,
      "learning_rate": 0.00013315161378418526,
      "loss": 1.5626,
      "step": 5149
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0778777599334717,
      "learning_rate": 0.0001331289787397222,
      "loss": 1.1836,
      "step": 5150
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.9800827503204346,
      "learning_rate": 0.0001331063417884958,
      "loss": 1.2446,
      "step": 5151
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7296406030654907,
      "learning_rate": 0.00013308370293180902,
      "loss": 1.2735,
      "step": 5152
    },
    {
      "epoch": 0.39,
      "grad_norm": 4.154973030090332,
      "learning_rate": 0.00013306106217096482,
      "loss": 0.9254,
      "step": 5153
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.552049994468689,
      "learning_rate": 0.0001330384195072663,
      "loss": 1.3567,
      "step": 5154
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.861120581626892,
      "learning_rate": 0.00013301577494201664,
      "loss": 1.4193,
      "step": 5155
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2025423049926758,
      "learning_rate": 0.00013299312847651925,
      "loss": 1.4,
      "step": 5156
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1526267528533936,
      "learning_rate": 0.00013297048011207748,
      "loss": 0.9325,
      "step": 5157
    },
    {
      "epoch": 0.39,
      "grad_norm": 4.096175193786621,
      "learning_rate": 0.00013294782984999491,
      "loss": 1.9465,
      "step": 5158
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0966105461120605,
      "learning_rate": 0.00013292517769157523,
      "loss": 1.7169,
      "step": 5159
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1978939771652222,
      "learning_rate": 0.00013290252363812216,
      "loss": 1.3679,
      "step": 5160
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5440833568572998,
      "learning_rate": 0.00013287986769093955,
      "loss": 1.4419,
      "step": 5161
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.411752462387085,
      "learning_rate": 0.00013285720985133139,
      "loss": 1.7748,
      "step": 5162
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2498130798339844,
      "learning_rate": 0.00013283455012060185,
      "loss": 1.423,
      "step": 5163
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2254583835601807,
      "learning_rate": 0.000132811888500055,
      "loss": 1.1597,
      "step": 5164
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.132885456085205,
      "learning_rate": 0.00013278922499099525,
      "loss": 1.1952,
      "step": 5165
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.764840841293335,
      "learning_rate": 0.00013276655959472696,
      "loss": 1.4804,
      "step": 5166
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.0282626152038574,
      "learning_rate": 0.00013274389231255466,
      "loss": 1.2948,
      "step": 5167
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2448854446411133,
      "learning_rate": 0.00013272122314578303,
      "loss": 2.1788,
      "step": 5168
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9733754396438599,
      "learning_rate": 0.00013269855209571674,
      "loss": 1.3489,
      "step": 5169
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4421958923339844,
      "learning_rate": 0.0001326758791636607,
      "loss": 1.4946,
      "step": 5170
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.1683523654937744,
      "learning_rate": 0.0001326532043509198,
      "loss": 1.6338,
      "step": 5171
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9239976406097412,
      "learning_rate": 0.00013263052765879918,
      "loss": 1.3061,
      "step": 5172
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.325434684753418,
      "learning_rate": 0.00013260784908860398,
      "loss": 1.3301,
      "step": 5173
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4467945098876953,
      "learning_rate": 0.00013258516864163945,
      "loss": 1.518,
      "step": 5174
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.875260829925537,
      "learning_rate": 0.00013256248631921103,
      "loss": 1.2542,
      "step": 5175
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5836710929870605,
      "learning_rate": 0.0001325398021226242,
      "loss": 1.4573,
      "step": 5176
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0227885246276855,
      "learning_rate": 0.00013251711605318458,
      "loss": 1.1566,
      "step": 5177
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.836510419845581,
      "learning_rate": 0.00013249442811219786,
      "loss": 1.6592,
      "step": 5178
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9913767576217651,
      "learning_rate": 0.0001324717383009699,
      "loss": 1.0298,
      "step": 5179
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6167409420013428,
      "learning_rate": 0.00013244904662080653,
      "loss": 1.745,
      "step": 5180
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.951125979423523,
      "learning_rate": 0.0001324263530730139,
      "loss": 1.7343,
      "step": 5181
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8829407691955566,
      "learning_rate": 0.00013240365765889814,
      "loss": 1.3724,
      "step": 5182
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.397661805152893,
      "learning_rate": 0.0001323809603797654,
      "loss": 1.771,
      "step": 5183
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3813400268554688,
      "learning_rate": 0.00013235826123692219,
      "loss": 1.6159,
      "step": 5184
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.719364881515503,
      "learning_rate": 0.00013233556023167485,
      "loss": 1.9035,
      "step": 5185
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3115921020507812,
      "learning_rate": 0.00013231285736533,
      "loss": 1.2007,
      "step": 5186
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7723356485366821,
      "learning_rate": 0.00013229015263919438,
      "loss": 1.8487,
      "step": 5187
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7632511854171753,
      "learning_rate": 0.0001322674460545747,
      "loss": 1.9204,
      "step": 5188
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.197021484375,
      "learning_rate": 0.00013224473761277783,
      "loss": 1.2666,
      "step": 5189
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.658339500427246,
      "learning_rate": 0.00013222202731511085,
      "loss": 1.486,
      "step": 5190
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4163472652435303,
      "learning_rate": 0.00013219931516288082,
      "loss": 1.8797,
      "step": 5191
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1779645681381226,
      "learning_rate": 0.00013217660115739498,
      "loss": 1.0828,
      "step": 5192
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2559260129928589,
      "learning_rate": 0.00013215388529996065,
      "loss": 1.422,
      "step": 5193
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2253143787384033,
      "learning_rate": 0.00013213116759188523,
      "loss": 1.5446,
      "step": 5194
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4540636539459229,
      "learning_rate": 0.00013210844803447628,
      "loss": 0.7948,
      "step": 5195
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2922353744506836,
      "learning_rate": 0.00013208572662904146,
      "loss": 1.5846,
      "step": 5196
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9481630325317383,
      "learning_rate": 0.00013206300337688848,
      "loss": 1.6172,
      "step": 5197
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9858641624450684,
      "learning_rate": 0.0001320402782793252,
      "loss": 1.5471,
      "step": 5198
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1523441076278687,
      "learning_rate": 0.0001320175513376596,
      "loss": 1.1174,
      "step": 5199
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1361550092697144,
      "learning_rate": 0.0001319948225531997,
      "loss": 1.177,
      "step": 5200
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.0803840160369873,
      "learning_rate": 0.00013197209192725374,
      "loss": 1.3521,
      "step": 5201
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.06626296043396,
      "learning_rate": 0.00013194935946112994,
      "loss": 1.6537,
      "step": 5202
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.7690539360046387,
      "learning_rate": 0.00013192662515613675,
      "loss": 1.7279,
      "step": 5203
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.807737112045288,
      "learning_rate": 0.00013190388901358256,
      "loss": 1.0963,
      "step": 5204
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5363529920578003,
      "learning_rate": 0.00013188115103477606,
      "loss": 2.0884,
      "step": 5205
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3721131086349487,
      "learning_rate": 0.00013185841122102592,
      "loss": 1.2327,
      "step": 5206
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7192425727844238,
      "learning_rate": 0.0001318356695736409,
      "loss": 2.5641,
      "step": 5207
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.8151936531066895,
      "learning_rate": 0.00013181292609393,
      "loss": 2.6212,
      "step": 5208
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7316173315048218,
      "learning_rate": 0.0001317901807832022,
      "loss": 1.5536,
      "step": 5209
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2407137155532837,
      "learning_rate": 0.0001317674336427666,
      "loss": 1.2894,
      "step": 5210
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.054889678955078,
      "learning_rate": 0.00013174468467393245,
      "loss": 1.3302,
      "step": 5211
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0044960975646973,
      "learning_rate": 0.00013172193387800906,
      "loss": 0.8784,
      "step": 5212
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3593440055847168,
      "learning_rate": 0.0001316991812563059,
      "loss": 1.5738,
      "step": 5213
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1555447578430176,
      "learning_rate": 0.00013167642681013252,
      "loss": 2.1634,
      "step": 5214
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.989281177520752,
      "learning_rate": 0.0001316536705407985,
      "loss": 1.1612,
      "step": 5215
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.7236971855163574,
      "learning_rate": 0.0001316309124496137,
      "loss": 1.8995,
      "step": 5216
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6572946310043335,
      "learning_rate": 0.0001316081525378879,
      "loss": 1.3726,
      "step": 5217
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.971318006515503,
      "learning_rate": 0.00013158539080693112,
      "loss": 0.8779,
      "step": 5218
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3386834859848022,
      "learning_rate": 0.00013156262725805339,
      "loss": 1.8084,
      "step": 5219
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3230730295181274,
      "learning_rate": 0.00013153986189256488,
      "loss": 1.2454,
      "step": 5220
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3056331872940063,
      "learning_rate": 0.00013151709471177588,
      "loss": 1.1258,
      "step": 5221
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1997023820877075,
      "learning_rate": 0.00013149432571699676,
      "loss": 1.3572,
      "step": 5222
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.561919927597046,
      "learning_rate": 0.00013147155490953803,
      "loss": 2.1602,
      "step": 5223
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8494073152542114,
      "learning_rate": 0.00013144878229071026,
      "loss": 1.5827,
      "step": 5224
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.068150281906128,
      "learning_rate": 0.00013142600786182417,
      "loss": 0.9813,
      "step": 5225
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3558778762817383,
      "learning_rate": 0.00013140323162419055,
      "loss": 2.1385,
      "step": 5226
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0107100009918213,
      "learning_rate": 0.0001313804535791203,
      "loss": 1.7336,
      "step": 5227
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.090794324874878,
      "learning_rate": 0.00013135767372792436,
      "loss": 1.4137,
      "step": 5228
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.972580909729004,
      "learning_rate": 0.000131334892071914,
      "loss": 2.0159,
      "step": 5229
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0379843711853027,
      "learning_rate": 0.00013131210861240026,
      "loss": 1.6924,
      "step": 5230
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7165216207504272,
      "learning_rate": 0.00013128932335069456,
      "loss": 1.9718,
      "step": 5231
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.286703109741211,
      "learning_rate": 0.00013126653628810835,
      "loss": 1.2684,
      "step": 5232
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5533831119537354,
      "learning_rate": 0.00013124374742595307,
      "loss": 2.169,
      "step": 5233
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0422818660736084,
      "learning_rate": 0.0001312209567655404,
      "loss": 1.8339,
      "step": 5234
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.561264157295227,
      "learning_rate": 0.00013119816430818206,
      "loss": 1.2823,
      "step": 5235
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5609281063079834,
      "learning_rate": 0.00013117537005518986,
      "loss": 1.2213,
      "step": 5236
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6896703243255615,
      "learning_rate": 0.00013115257400787584,
      "loss": 2.0401,
      "step": 5237
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0493879318237305,
      "learning_rate": 0.00013112977616755193,
      "loss": 0.9123,
      "step": 5238
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.175803303718567,
      "learning_rate": 0.00013110697653553034,
      "loss": 1.7744,
      "step": 5239
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4233437776565552,
      "learning_rate": 0.0001310841751131233,
      "loss": 1.1839,
      "step": 5240
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0685757398605347,
      "learning_rate": 0.00013106137190164316,
      "loss": 1.6609,
      "step": 5241
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.675172209739685,
      "learning_rate": 0.00013103856690240234,
      "loss": 1.2517,
      "step": 5242
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.475125551223755,
      "learning_rate": 0.00013101576011671353,
      "loss": 1.5124,
      "step": 5243
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2070869207382202,
      "learning_rate": 0.00013099295154588925,
      "loss": 1.6701,
      "step": 5244
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.074873685836792,
      "learning_rate": 0.0001309701411912423,
      "loss": 1.4826,
      "step": 5245
    },
    {
      "epoch": 0.4,
      "grad_norm": 4.517148494720459,
      "learning_rate": 0.00013094732905408557,
      "loss": 1.9154,
      "step": 5246
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3312022686004639,
      "learning_rate": 0.00013092451513573204,
      "loss": 2.1014,
      "step": 5247
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.671384811401367,
      "learning_rate": 0.00013090169943749476,
      "loss": 1.5304,
      "step": 5248
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3567157983779907,
      "learning_rate": 0.0001308788819606869,
      "loss": 1.1133,
      "step": 5249
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4117761850357056,
      "learning_rate": 0.00013085606270662176,
      "loss": 1.5336,
      "step": 5250
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.157148838043213,
      "learning_rate": 0.00013083324167661268,
      "loss": 1.2178,
      "step": 5251
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.088265061378479,
      "learning_rate": 0.0001308104188719732,
      "loss": 1.1689,
      "step": 5252
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2162537574768066,
      "learning_rate": 0.00013078759429401688,
      "loss": 1.4769,
      "step": 5253
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9398385286331177,
      "learning_rate": 0.00013076476794405734,
      "loss": 2.0778,
      "step": 5254
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2408592700958252,
      "learning_rate": 0.00013074193982340847,
      "loss": 1.9945,
      "step": 5255
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6155904531478882,
      "learning_rate": 0.0001307191099333841,
      "loss": 1.6927,
      "step": 5256
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5633901357650757,
      "learning_rate": 0.00013069627827529825,
      "loss": 1.6572,
      "step": 5257
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.023585557937622,
      "learning_rate": 0.00013067344485046496,
      "loss": 1.4879,
      "step": 5258
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.201101303100586,
      "learning_rate": 0.00013065060966019852,
      "loss": 0.5823,
      "step": 5259
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1585065126419067,
      "learning_rate": 0.00013062777270581312,
      "loss": 1.1187,
      "step": 5260
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5961720943450928,
      "learning_rate": 0.00013060493398862324,
      "loss": 1.5101,
      "step": 5261
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1628494262695312,
      "learning_rate": 0.00013058209350994338,
      "loss": 1.2329,
      "step": 5262
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3222506046295166,
      "learning_rate": 0.00013055925127108804,
      "loss": 1.6507,
      "step": 5263
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.9951424598693848,
      "learning_rate": 0.000130536407273372,
      "loss": 1.6838,
      "step": 5264
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.386685848236084,
      "learning_rate": 0.0001305135615181101,
      "loss": 2.0953,
      "step": 5265
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0530221462249756,
      "learning_rate": 0.00013049071400661716,
      "loss": 1.1649,
      "step": 5266
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1102590560913086,
      "learning_rate": 0.00013046786474020826,
      "loss": 1.6869,
      "step": 5267
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4921951293945312,
      "learning_rate": 0.00013044501372019845,
      "loss": 1.5672,
      "step": 5268
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2854357957839966,
      "learning_rate": 0.00013042216094790297,
      "loss": 1.3777,
      "step": 5269
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1048429012298584,
      "learning_rate": 0.00013039930642463708,
      "loss": 1.7537,
      "step": 5270
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8801612854003906,
      "learning_rate": 0.00013037645015171628,
      "loss": 1.7892,
      "step": 5271
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8648544549942017,
      "learning_rate": 0.000130353592130456,
      "loss": 1.6473,
      "step": 5272
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.489955186843872,
      "learning_rate": 0.0001303307323621719,
      "loss": 1.6399,
      "step": 5273
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2334140539169312,
      "learning_rate": 0.00013030787084817967,
      "loss": 1.657,
      "step": 5274
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.520573377609253,
      "learning_rate": 0.00013028500758979506,
      "loss": 1.2875,
      "step": 5275
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2472383975982666,
      "learning_rate": 0.00013026214258833415,
      "loss": 0.6948,
      "step": 5276
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7669013738632202,
      "learning_rate": 0.00013023927584511274,
      "loss": 2.2548,
      "step": 5277
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0786372423171997,
      "learning_rate": 0.0001302164073614471,
      "loss": 1.5543,
      "step": 5278
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0622873306274414,
      "learning_rate": 0.0001301935371386534,
      "loss": 1.0438,
      "step": 5279
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.5142250061035156,
      "learning_rate": 0.00013017066517804793,
      "loss": 1.6989,
      "step": 5280
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6897987127304077,
      "learning_rate": 0.0001301477914809471,
      "loss": 1.1218,
      "step": 5281
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4629595279693604,
      "learning_rate": 0.00013012491604866747,
      "loss": 1.4683,
      "step": 5282
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1983076333999634,
      "learning_rate": 0.00013010203888252559,
      "loss": 1.8588,
      "step": 5283
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.124474287033081,
      "learning_rate": 0.0001300791599838382,
      "loss": 0.9833,
      "step": 5284
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1422648429870605,
      "learning_rate": 0.00013005627935392216,
      "loss": 1.0629,
      "step": 5285
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8230249881744385,
      "learning_rate": 0.00013003339699409433,
      "loss": 1.4944,
      "step": 5286
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8675639629364014,
      "learning_rate": 0.0001300105129056717,
      "loss": 1.4715,
      "step": 5287
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3213882446289062,
      "learning_rate": 0.00012998762708997142,
      "loss": 1.4576,
      "step": 5288
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0138936042785645,
      "learning_rate": 0.0001299647395483107,
      "loss": 1.8536,
      "step": 5289
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.680280089378357,
      "learning_rate": 0.00012994185028200684,
      "loss": 1.4557,
      "step": 5290
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3379896879196167,
      "learning_rate": 0.00012991895929237725,
      "loss": 1.3058,
      "step": 5291
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8277374505996704,
      "learning_rate": 0.00012989606658073946,
      "loss": 2.1742,
      "step": 5292
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3955652713775635,
      "learning_rate": 0.000129873172148411,
      "loss": 1.0356,
      "step": 5293
    },
    {
      "epoch": 0.4,
      "grad_norm": 4.527332782745361,
      "learning_rate": 0.00012985027599670966,
      "loss": 1.9935,
      "step": 5294
    },
    {
      "epoch": 0.4,
      "grad_norm": 4.109129428863525,
      "learning_rate": 0.00012982737812695325,
      "loss": 1.8945,
      "step": 5295
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.9149486422538757,
      "learning_rate": 0.0001298044785404596,
      "loss": 1.4068,
      "step": 5296
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3629672527313232,
      "learning_rate": 0.0001297815772385468,
      "loss": 1.3313,
      "step": 5297
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0855755805969238,
      "learning_rate": 0.00012975867422253284,
      "loss": 0.9063,
      "step": 5298
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3959373235702515,
      "learning_rate": 0.00012973576949373603,
      "loss": 1.4293,
      "step": 5299
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2747491598129272,
      "learning_rate": 0.00012971286305347464,
      "loss": 1.1587,
      "step": 5300
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.803856372833252,
      "learning_rate": 0.000129689954903067,
      "loss": 1.1159,
      "step": 5301
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2300469875335693,
      "learning_rate": 0.00012966704504383168,
      "loss": 1.1117,
      "step": 5302
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.700789213180542,
      "learning_rate": 0.00012964413347708726,
      "loss": 1.5133,
      "step": 5303
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.0660151243209839,
      "learning_rate": 0.00012962122020415245,
      "loss": 1.7979,
      "step": 5304
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.497164011001587,
      "learning_rate": 0.00012959830522634596,
      "loss": 1.4689,
      "step": 5305
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1507099866867065,
      "learning_rate": 0.00012957538854498679,
      "loss": 1.4363,
      "step": 5306
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1578599214553833,
      "learning_rate": 0.00012955247016139382,
      "loss": 1.2855,
      "step": 5307
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3477368354797363,
      "learning_rate": 0.00012952955007688622,
      "loss": 2.1184,
      "step": 5308
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1266227960586548,
      "learning_rate": 0.00012950662829278316,
      "loss": 1.0792,
      "step": 5309
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0241332054138184,
      "learning_rate": 0.00012948370481040384,
      "loss": 1.7011,
      "step": 5310
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0755845308303833,
      "learning_rate": 0.00012946077963106774,
      "loss": 1.169,
      "step": 5311
    },
    {
      "epoch": 0.41,
      "grad_norm": 6.224391460418701,
      "learning_rate": 0.00012943785275609427,
      "loss": 2.1911,
      "step": 5312
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0724616050720215,
      "learning_rate": 0.00012941492418680305,
      "loss": 2.1375,
      "step": 5313
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9457921981811523,
      "learning_rate": 0.0001293919939245137,
      "loss": 1.46,
      "step": 5314
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.132634401321411,
      "learning_rate": 0.00012936906197054605,
      "loss": 0.9904,
      "step": 5315
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3885996341705322,
      "learning_rate": 0.0001293461283262199,
      "loss": 1.2558,
      "step": 5316
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5851454734802246,
      "learning_rate": 0.00012932319299285527,
      "loss": 1.6343,
      "step": 5317
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.408820152282715,
      "learning_rate": 0.0001293002559717722,
      "loss": 1.6083,
      "step": 5318
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.123673677444458,
      "learning_rate": 0.0001292773172642908,
      "loss": 1.2056,
      "step": 5319
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0053205490112305,
      "learning_rate": 0.00012925437687173142,
      "loss": 1.3151,
      "step": 5320
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6756852865219116,
      "learning_rate": 0.00012923143479541435,
      "loss": 1.1197,
      "step": 5321
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1556671857833862,
      "learning_rate": 0.00012920849103666002,
      "loss": 1.4182,
      "step": 5322
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2192147970199585,
      "learning_rate": 0.00012918554559678902,
      "loss": 1.7061,
      "step": 5323
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9970151782035828,
      "learning_rate": 0.000129162598477122,
      "loss": 1.313,
      "step": 5324
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1888351440429688,
      "learning_rate": 0.00012913964967897963,
      "loss": 1.4792,
      "step": 5325
    },
    {
      "epoch": 0.41,
      "grad_norm": 4.011811256408691,
      "learning_rate": 0.0001291166992036828,
      "loss": 1.4133,
      "step": 5326
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6188089847564697,
      "learning_rate": 0.00012909374705255246,
      "loss": 1.4363,
      "step": 5327
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2575191259384155,
      "learning_rate": 0.0001290707932269096,
      "loss": 1.3146,
      "step": 5328
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.101588726043701,
      "learning_rate": 0.00012904783772807533,
      "loss": 1.7312,
      "step": 5329
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7163931131362915,
      "learning_rate": 0.00012902488055737093,
      "loss": 1.5645,
      "step": 5330
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0477842092514038,
      "learning_rate": 0.00012900192171611764,
      "loss": 1.6705,
      "step": 5331
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2822139263153076,
      "learning_rate": 0.00012897896120563695,
      "loss": 1.8514,
      "step": 5332
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2350558042526245,
      "learning_rate": 0.00012895599902725034,
      "loss": 1.7642,
      "step": 5333
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9152172803878784,
      "learning_rate": 0.0001289330351822794,
      "loss": 1.8987,
      "step": 5334
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3130017518997192,
      "learning_rate": 0.00012891006967204584,
      "loss": 1.6617,
      "step": 5335
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7215782403945923,
      "learning_rate": 0.00012888710249787147,
      "loss": 1.4497,
      "step": 5336
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4591156244277954,
      "learning_rate": 0.00012886413366107815,
      "loss": 1.1995,
      "step": 5337
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.477113127708435,
      "learning_rate": 0.0001288411631629879,
      "loss": 1.0343,
      "step": 5338
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7407159805297852,
      "learning_rate": 0.00012881819100492281,
      "loss": 2.2852,
      "step": 5339
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.039083480834961,
      "learning_rate": 0.00012879521718820502,
      "loss": 1.0392,
      "step": 5340
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5621956586837769,
      "learning_rate": 0.00012877224171415686,
      "loss": 1.8526,
      "step": 5341
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.337674617767334,
      "learning_rate": 0.0001287492645841007,
      "loss": 1.7657,
      "step": 5342
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4312127828598022,
      "learning_rate": 0.0001287262857993589,
      "loss": 1.3218,
      "step": 5343
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.6659271717071533,
      "learning_rate": 0.00012870330536125414,
      "loss": 1.1552,
      "step": 5344
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0135281085968018,
      "learning_rate": 0.00012868032327110904,
      "loss": 1.5445,
      "step": 5345
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.142531394958496,
      "learning_rate": 0.00012865733953024632,
      "loss": 1.497,
      "step": 5346
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9701104164123535,
      "learning_rate": 0.0001286343541399889,
      "loss": 1.7084,
      "step": 5347
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4322677850723267,
      "learning_rate": 0.00012861136710165964,
      "loss": 1.5033,
      "step": 5348
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0577454566955566,
      "learning_rate": 0.0001285883784165816,
      "loss": 1.6778,
      "step": 5349
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2629817724227905,
      "learning_rate": 0.00012856538808607795,
      "loss": 1.5674,
      "step": 5350
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.977980375289917,
      "learning_rate": 0.00012854239611147187,
      "loss": 2.1512,
      "step": 5351
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.149141788482666,
      "learning_rate": 0.0001285194024940867,
      "loss": 1.4245,
      "step": 5352
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1420445442199707,
      "learning_rate": 0.00012849640723524588,
      "loss": 1.5769,
      "step": 5353
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4542560577392578,
      "learning_rate": 0.00012847341033627287,
      "loss": 1.771,
      "step": 5354
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3739187717437744,
      "learning_rate": 0.00012845041179849128,
      "loss": 1.1148,
      "step": 5355
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.301350712776184,
      "learning_rate": 0.00012842741162322487,
      "loss": 1.5492,
      "step": 5356
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1936722993850708,
      "learning_rate": 0.00012840440981179736,
      "loss": 1.3914,
      "step": 5357
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8142507076263428,
      "learning_rate": 0.00012838140636553264,
      "loss": 1.6168,
      "step": 5358
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9943119287490845,
      "learning_rate": 0.00012835840128575475,
      "loss": 1.2682,
      "step": 5359
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8692117929458618,
      "learning_rate": 0.00012833539457378774,
      "loss": 1.5726,
      "step": 5360
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7889772653579712,
      "learning_rate": 0.00012831238623095575,
      "loss": 2.0031,
      "step": 5361
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.210856318473816,
      "learning_rate": 0.00012828937625858304,
      "loss": 1.8327,
      "step": 5362
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.384764313697815,
      "learning_rate": 0.000128266364657994,
      "loss": 1.8705,
      "step": 5363
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5293095111846924,
      "learning_rate": 0.00012824335143051305,
      "loss": 1.3842,
      "step": 5364
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1756452322006226,
      "learning_rate": 0.00012822033657746478,
      "loss": 1.4762,
      "step": 5365
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9795228838920593,
      "learning_rate": 0.00012819732010017378,
      "loss": 1.1335,
      "step": 5366
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3318880796432495,
      "learning_rate": 0.0001281743019999648,
      "loss": 1.2017,
      "step": 5367
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2556360960006714,
      "learning_rate": 0.0001281512822781627,
      "loss": 1.5915,
      "step": 5368
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.084336996078491,
      "learning_rate": 0.0001281282609360923,
      "loss": 1.2837,
      "step": 5369
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6901181936264038,
      "learning_rate": 0.0001281052379750787,
      "loss": 1.1899,
      "step": 5370
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.23252534866333,
      "learning_rate": 0.00012808221339644698,
      "loss": 1.3421,
      "step": 5371
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4833463430404663,
      "learning_rate": 0.0001280591872015223,
      "loss": 1.3551,
      "step": 5372
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.562469005584717,
      "learning_rate": 0.00012803615939163,
      "loss": 1.7617,
      "step": 5373
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.708601951599121,
      "learning_rate": 0.00012801312996809546,
      "loss": 0.9854,
      "step": 5374
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8643224239349365,
      "learning_rate": 0.00012799009893224412,
      "loss": 1.4175,
      "step": 5375
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.495706558227539,
      "learning_rate": 0.00012796706628540156,
      "loss": 1.541,
      "step": 5376
    },
    {
      "epoch": 0.41,
      "grad_norm": 4.761238098144531,
      "learning_rate": 0.00012794403202889346,
      "loss": 2.08,
      "step": 5377
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.888049840927124,
      "learning_rate": 0.00012792099616404555,
      "loss": 1.592,
      "step": 5378
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.0152978897094727,
      "learning_rate": 0.00012789795869218372,
      "loss": 1.966,
      "step": 5379
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3646608591079712,
      "learning_rate": 0.0001278749196146339,
      "loss": 1.2097,
      "step": 5380
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9023343324661255,
      "learning_rate": 0.00012785187893272205,
      "loss": 1.6458,
      "step": 5381
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0310512781143188,
      "learning_rate": 0.00012782883664777433,
      "loss": 0.9726,
      "step": 5382
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2617502212524414,
      "learning_rate": 0.00012780579276111702,
      "loss": 1.5196,
      "step": 5383
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4621046781539917,
      "learning_rate": 0.00012778274727407636,
      "loss": 1.727,
      "step": 5384
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5311013460159302,
      "learning_rate": 0.00012775970018797877,
      "loss": 1.0343,
      "step": 5385
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1699650287628174,
      "learning_rate": 0.00012773665150415074,
      "loss": 1.1166,
      "step": 5386
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1758863925933838,
      "learning_rate": 0.00012771360122391885,
      "loss": 2.2357,
      "step": 5387
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.52034068107605,
      "learning_rate": 0.00012769054934860978,
      "loss": 2.1996,
      "step": 5388
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9580276012420654,
      "learning_rate": 0.00012766749587955035,
      "loss": 1.646,
      "step": 5389
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.8527870178222656,
      "learning_rate": 0.00012764444081806727,
      "loss": 0.8466,
      "step": 5390
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.095885992050171,
      "learning_rate": 0.00012762138416548766,
      "loss": 1.8404,
      "step": 5391
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0357657670974731,
      "learning_rate": 0.0001275983259231385,
      "loss": 1.4642,
      "step": 5392
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9377613067626953,
      "learning_rate": 0.00012757526609234688,
      "loss": 1.4228,
      "step": 5393
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7213950157165527,
      "learning_rate": 0.0001275522046744401,
      "loss": 1.0741,
      "step": 5394
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.241621971130371,
      "learning_rate": 0.0001275291416707454,
      "loss": 1.5097,
      "step": 5395
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.319115400314331,
      "learning_rate": 0.00012750607708259025,
      "loss": 1.3552,
      "step": 5396
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.093533515930176,
      "learning_rate": 0.00012748301091130215,
      "loss": 1.3691,
      "step": 5397
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.0639102458953857,
      "learning_rate": 0.00012745994315820866,
      "loss": 2.3707,
      "step": 5398
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.240514039993286,
      "learning_rate": 0.00012743687382463746,
      "loss": 1.8534,
      "step": 5399
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.163194417953491,
      "learning_rate": 0.00012741380291191636,
      "loss": 1.7862,
      "step": 5400
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4822887182235718,
      "learning_rate": 0.0001273907304213732,
      "loss": 1.4747,
      "step": 5401
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8090825080871582,
      "learning_rate": 0.0001273676563543359,
      "loss": 0.8733,
      "step": 5402
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3519282341003418,
      "learning_rate": 0.0001273445807121326,
      "loss": 1.6775,
      "step": 5403
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3615690469741821,
      "learning_rate": 0.00012732150349609132,
      "loss": 1.6352,
      "step": 5404
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.078413963317871,
      "learning_rate": 0.00012729842470754032,
      "loss": 1.7429,
      "step": 5405
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0434796810150146,
      "learning_rate": 0.000127275344347808,
      "loss": 1.3695,
      "step": 5406
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.832636833190918,
      "learning_rate": 0.00012725226241822268,
      "loss": 1.7595,
      "step": 5407
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2626413106918335,
      "learning_rate": 0.00012722917892011288,
      "loss": 1.3248,
      "step": 5408
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9779356718063354,
      "learning_rate": 0.0001272060938548072,
      "loss": 1.4808,
      "step": 5409
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6669765710830688,
      "learning_rate": 0.0001271830072236343,
      "loss": 1.181,
      "step": 5410
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4729710817337036,
      "learning_rate": 0.00012715991902792294,
      "loss": 1.6588,
      "step": 5411
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1943432092666626,
      "learning_rate": 0.00012713682926900204,
      "loss": 1.5272,
      "step": 5412
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7346620559692383,
      "learning_rate": 0.00012711373794820053,
      "loss": 2.0751,
      "step": 5413
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6213217973709106,
      "learning_rate": 0.00012709064506684733,
      "loss": 1.6938,
      "step": 5414
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7698122262954712,
      "learning_rate": 0.00012706755062627173,
      "loss": 1.8838,
      "step": 5415
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0858066082000732,
      "learning_rate": 0.0001270444546278029,
      "loss": 0.7457,
      "step": 5416
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4937704801559448,
      "learning_rate": 0.00012702135707277006,
      "loss": 1.3603,
      "step": 5417
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.218225359916687,
      "learning_rate": 0.00012699825796250272,
      "loss": 1.5879,
      "step": 5418
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1022862195968628,
      "learning_rate": 0.00012697515729833032,
      "loss": 1.472,
      "step": 5419
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5087465047836304,
      "learning_rate": 0.00012695205508158242,
      "loss": 1.7567,
      "step": 5420
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4952155351638794,
      "learning_rate": 0.0001269289513135887,
      "loss": 1.0973,
      "step": 5421
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.840484619140625,
      "learning_rate": 0.00012690584599567894,
      "loss": 1.9086,
      "step": 5422
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8393964767456055,
      "learning_rate": 0.00012688273912918297,
      "loss": 1.5515,
      "step": 5423
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.462272047996521,
      "learning_rate": 0.00012685963071543067,
      "loss": 1.6028,
      "step": 5424
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6814547777175903,
      "learning_rate": 0.00012683652075575218,
      "loss": 1.5728,
      "step": 5425
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2302521467208862,
      "learning_rate": 0.00012681340925147744,
      "loss": 1.1253,
      "step": 5426
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.069972038269043,
      "learning_rate": 0.00012679029620393684,
      "loss": 1.5169,
      "step": 5427
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.375659704208374,
      "learning_rate": 0.00012676718161446052,
      "loss": 1.2993,
      "step": 5428
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.57599937915802,
      "learning_rate": 0.00012674406548437894,
      "loss": 1.9208,
      "step": 5429
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8036290407180786,
      "learning_rate": 0.00012672094781502252,
      "loss": 1.0558,
      "step": 5430
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.649035930633545,
      "learning_rate": 0.00012669782860772185,
      "loss": 1.0397,
      "step": 5431
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4143136739730835,
      "learning_rate": 0.00012667470786380757,
      "loss": 1.4064,
      "step": 5432
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.372917652130127,
      "learning_rate": 0.0001266515855846104,
      "loss": 1.5562,
      "step": 5433
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3808720111846924,
      "learning_rate": 0.00012662846177146112,
      "loss": 1.8837,
      "step": 5434
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.9464218020439148,
      "learning_rate": 0.00012660533642569073,
      "loss": 1.2003,
      "step": 5435
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5188994407653809,
      "learning_rate": 0.00012658220954863019,
      "loss": 1.4803,
      "step": 5436
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.0272657871246338,
      "learning_rate": 0.0001265590811416105,
      "loss": 1.0697,
      "step": 5437
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4485334157943726,
      "learning_rate": 0.00012653595120596298,
      "loss": 1.3448,
      "step": 5438
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3550595045089722,
      "learning_rate": 0.00012651281974301876,
      "loss": 1.4465,
      "step": 5439
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0151073932647705,
      "learning_rate": 0.00012648968675410928,
      "loss": 1.9464,
      "step": 5440
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.100053071975708,
      "learning_rate": 0.0001264665522405659,
      "loss": 1.2129,
      "step": 5441
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6661112308502197,
      "learning_rate": 0.00012644341620372023,
      "loss": 1.5024,
      "step": 5442
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.427386999130249,
      "learning_rate": 0.0001264202786449038,
      "loss": 1.521,
      "step": 5443
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2292957305908203,
      "learning_rate": 0.00012639713956544835,
      "loss": 1.4214,
      "step": 5444
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4864296913146973,
      "learning_rate": 0.0001263739989666857,
      "loss": 1.4904,
      "step": 5445
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2700287103652954,
      "learning_rate": 0.00012635085684994767,
      "loss": 1.1498,
      "step": 5446
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4109336137771606,
      "learning_rate": 0.00012632771321656624,
      "loss": 1.1814,
      "step": 5447
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7091130018234253,
      "learning_rate": 0.00012630456806787346,
      "loss": 1.2009,
      "step": 5448
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.451223611831665,
      "learning_rate": 0.00012628142140520146,
      "loss": 0.8633,
      "step": 5449
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4273381233215332,
      "learning_rate": 0.00012625827322988245,
      "loss": 1.4421,
      "step": 5450
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5076720714569092,
      "learning_rate": 0.0001262351235432488,
      "loss": 1.2424,
      "step": 5451
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2845650911331177,
      "learning_rate": 0.00012621197234663283,
      "loss": 0.8047,
      "step": 5452
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0795350074768066,
      "learning_rate": 0.00012618881964136707,
      "loss": 1.0884,
      "step": 5453
    },
    {
      "epoch": 0.42,
      "grad_norm": 4.891432285308838,
      "learning_rate": 0.00012616566542878412,
      "loss": 1.4766,
      "step": 5454
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.181894063949585,
      "learning_rate": 0.00012614250971021657,
      "loss": 1.3232,
      "step": 5455
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8338412046432495,
      "learning_rate": 0.0001261193524869972,
      "loss": 2.1761,
      "step": 5456
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4103732109069824,
      "learning_rate": 0.00012609619376045884,
      "loss": 1.5764,
      "step": 5457
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.562341570854187,
      "learning_rate": 0.00012607303353193437,
      "loss": 1.5833,
      "step": 5458
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.047044277191162,
      "learning_rate": 0.0001260498718027569,
      "loss": 0.8981,
      "step": 5459
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2438786029815674,
      "learning_rate": 0.0001260267085742594,
      "loss": 1.3482,
      "step": 5460
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.762177586555481,
      "learning_rate": 0.00012600354384777513,
      "loss": 1.0841,
      "step": 5461
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4266256093978882,
      "learning_rate": 0.0001259803776246373,
      "loss": 1.4498,
      "step": 5462
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1860734224319458,
      "learning_rate": 0.0001259572099061793,
      "loss": 1.412,
      "step": 5463
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5781997442245483,
      "learning_rate": 0.0001259340406937345,
      "loss": 1.1089,
      "step": 5464
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.984549045562744,
      "learning_rate": 0.00012591086998863652,
      "loss": 1.5175,
      "step": 5465
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6038258075714111,
      "learning_rate": 0.0001258876977922189,
      "loss": 1.4053,
      "step": 5466
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0514395236968994,
      "learning_rate": 0.00012586452410581533,
      "loss": 1.8708,
      "step": 5467
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2715007066726685,
      "learning_rate": 0.00012584134893075963,
      "loss": 1.1811,
      "step": 5468
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0483765602111816,
      "learning_rate": 0.00012581817226838563,
      "loss": 1.3153,
      "step": 5469
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3617109060287476,
      "learning_rate": 0.0001257949941200273,
      "loss": 1.3935,
      "step": 5470
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.149139404296875,
      "learning_rate": 0.00012577181448701868,
      "loss": 1.8049,
      "step": 5471
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5002493858337402,
      "learning_rate": 0.00012574863337069385,
      "loss": 1.3408,
      "step": 5472
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2915565967559814,
      "learning_rate": 0.00012572545077238703,
      "loss": 1.3884,
      "step": 5473
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1674963235855103,
      "learning_rate": 0.00012570226669343258,
      "loss": 1.8697,
      "step": 5474
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.241379499435425,
      "learning_rate": 0.0001256790811351648,
      "loss": 1.4221,
      "step": 5475
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5208488702774048,
      "learning_rate": 0.00012565589409891816,
      "loss": 1.4978,
      "step": 5476
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.378644347190857,
      "learning_rate": 0.00012563270558602725,
      "loss": 1.4794,
      "step": 5477
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.967000961303711,
      "learning_rate": 0.00012560951559782666,
      "loss": 2.3158,
      "step": 5478
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7719554901123047,
      "learning_rate": 0.00012558632413565114,
      "loss": 1.3709,
      "step": 5479
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2962477207183838,
      "learning_rate": 0.00012556313120083546,
      "loss": 1.6982,
      "step": 5480
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.7998774647712708,
      "learning_rate": 0.00012553993679471453,
      "loss": 1.0291,
      "step": 5481
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.064042329788208,
      "learning_rate": 0.0001255167409186233,
      "loss": 1.5453,
      "step": 5482
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9416842460632324,
      "learning_rate": 0.0001254935435738969,
      "loss": 1.4488,
      "step": 5483
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.1397416591644287,
      "learning_rate": 0.00012547034476187037,
      "loss": 1.2011,
      "step": 5484
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4579105377197266,
      "learning_rate": 0.00012544714448387893,
      "loss": 1.989,
      "step": 5485
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.8828465342521667,
      "learning_rate": 0.000125423942741258,
      "loss": 1.0324,
      "step": 5486
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9253695011138916,
      "learning_rate": 0.0001254007395353429,
      "loss": 1.2604,
      "step": 5487
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.071406602859497,
      "learning_rate": 0.00012537753486746908,
      "loss": 1.2934,
      "step": 5488
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3062291145324707,
      "learning_rate": 0.00012535432873897218,
      "loss": 1.3648,
      "step": 5489
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.2363104820251465,
      "learning_rate": 0.00012533112115118778,
      "loss": 1.1197,
      "step": 5490
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5100181102752686,
      "learning_rate": 0.00012530791210545162,
      "loss": 2.0213,
      "step": 5491
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2377238273620605,
      "learning_rate": 0.00012528470160309957,
      "loss": 1.3857,
      "step": 5492
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0385780334472656,
      "learning_rate": 0.0001252614896454675,
      "loss": 1.4599,
      "step": 5493
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5938854217529297,
      "learning_rate": 0.00012523827623389132,
      "loss": 1.706,
      "step": 5494
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4053573608398438,
      "learning_rate": 0.0001252150613697072,
      "loss": 1.0845,
      "step": 5495
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.8813590407371521,
      "learning_rate": 0.0001251918450542512,
      "loss": 1.0666,
      "step": 5496
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.382370710372925,
      "learning_rate": 0.00012516862728885965,
      "loss": 1.7817,
      "step": 5497
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1586456298828125,
      "learning_rate": 0.00012514540807486877,
      "loss": 1.3394,
      "step": 5498
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0907317399978638,
      "learning_rate": 0.00012512218741361506,
      "loss": 1.7702,
      "step": 5499
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.530403733253479,
      "learning_rate": 0.00012509896530643488,
      "loss": 1.5784,
      "step": 5500
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8546991348266602,
      "learning_rate": 0.00012507574175466487,
      "loss": 1.6752,
      "step": 5501
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.5745677947998047,
      "learning_rate": 0.0001250525167596417,
      "loss": 1.2958,
      "step": 5502
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.579622745513916,
      "learning_rate": 0.00012502929032270205,
      "loss": 2.1633,
      "step": 5503
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.389306902885437,
      "learning_rate": 0.00012500606244518275,
      "loss": 1.3467,
      "step": 5504
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4844403266906738,
      "learning_rate": 0.0001249828331284207,
      "loss": 1.6345,
      "step": 5505
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8619270324707031,
      "learning_rate": 0.00012495960237375294,
      "loss": 2.4545,
      "step": 5506
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4074684381484985,
      "learning_rate": 0.00012493637018251645,
      "loss": 1.3145,
      "step": 5507
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0035040378570557,
      "learning_rate": 0.00012491313655604836,
      "loss": 1.2694,
      "step": 5508
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6003607511520386,
      "learning_rate": 0.000124889901495686,
      "loss": 1.3769,
      "step": 5509
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6417267322540283,
      "learning_rate": 0.0001248666650027666,
      "loss": 1.791,
      "step": 5510
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6765691041946411,
      "learning_rate": 0.00012484342707862756,
      "loss": 1.6674,
      "step": 5511
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2997276782989502,
      "learning_rate": 0.0001248201877246064,
      "loss": 1.3365,
      "step": 5512
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7661781311035156,
      "learning_rate": 0.00012479694694204067,
      "loss": 1.2457,
      "step": 5513
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.637483596801758,
      "learning_rate": 0.00012477370473226796,
      "loss": 1.6224,
      "step": 5514
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5658316612243652,
      "learning_rate": 0.00012475046109662605,
      "loss": 1.6035,
      "step": 5515
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4263086318969727,
      "learning_rate": 0.00012472721603645274,
      "loss": 1.5022,
      "step": 5516
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1640081405639648,
      "learning_rate": 0.00012470396955308586,
      "loss": 1.5705,
      "step": 5517
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3701735734939575,
      "learning_rate": 0.0001246807216478634,
      "loss": 1.6544,
      "step": 5518
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.307915449142456,
      "learning_rate": 0.0001246574723221235,
      "loss": 1.5818,
      "step": 5519
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7105365991592407,
      "learning_rate": 0.00012463422157720416,
      "loss": 1.3037,
      "step": 5520
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.238426685333252,
      "learning_rate": 0.0001246109694144437,
      "loss": 1.7305,
      "step": 5521
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.299996256828308,
      "learning_rate": 0.00012458771583518033,
      "loss": 1.1702,
      "step": 5522
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7547101974487305,
      "learning_rate": 0.0001245644608407525,
      "loss": 1.1638,
      "step": 5523
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1861836910247803,
      "learning_rate": 0.0001245412044324986,
      "loss": 0.9407,
      "step": 5524
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2727004289627075,
      "learning_rate": 0.00012451794661175723,
      "loss": 1.1812,
      "step": 5525
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.9819633960723877,
      "learning_rate": 0.000124494687379867,
      "loss": 1.2951,
      "step": 5526
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3401825428009033,
      "learning_rate": 0.0001244714267381666,
      "loss": 1.3735,
      "step": 5527
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.244678258895874,
      "learning_rate": 0.0001244481646879948,
      "loss": 0.8851,
      "step": 5528
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9931647777557373,
      "learning_rate": 0.00012442490123069049,
      "loss": 1.3761,
      "step": 5529
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.373706579208374,
      "learning_rate": 0.0001244016363675926,
      "loss": 0.7325,
      "step": 5530
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4100030660629272,
      "learning_rate": 0.00012437837010004018,
      "loss": 1.623,
      "step": 5531
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0290892124176025,
      "learning_rate": 0.00012435510242937226,
      "loss": 1.428,
      "step": 5532
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5800021886825562,
      "learning_rate": 0.00012433183335692816,
      "loss": 1.0729,
      "step": 5533
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2082648277282715,
      "learning_rate": 0.00012430856288404705,
      "loss": 1.4699,
      "step": 5534
    },
    {
      "epoch": 0.42,
      "grad_norm": 5.391329765319824,
      "learning_rate": 0.0001242852910120683,
      "loss": 2.0661,
      "step": 5535
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5199549198150635,
      "learning_rate": 0.00012426201774233135,
      "loss": 1.4289,
      "step": 5536
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8044257164001465,
      "learning_rate": 0.00012423874307617572,
      "loss": 1.6043,
      "step": 5537
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2332589626312256,
      "learning_rate": 0.00012421546701494096,
      "loss": 1.0503,
      "step": 5538
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.0843799114227295,
      "learning_rate": 0.00012419218955996676,
      "loss": 0.8612,
      "step": 5539
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.227289080619812,
      "learning_rate": 0.00012416891071259293,
      "loss": 1.8556,
      "step": 5540
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8490993976593018,
      "learning_rate": 0.00012414563047415922,
      "loss": 1.6743,
      "step": 5541
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6543505191802979,
      "learning_rate": 0.00012412234884600558,
      "loss": 0.9009,
      "step": 5542
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.378440499305725,
      "learning_rate": 0.00012409906582947202,
      "loss": 1.4259,
      "step": 5543
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9333502054214478,
      "learning_rate": 0.00012407578142589856,
      "loss": 1.2333,
      "step": 5544
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.8706295490264893,
      "learning_rate": 0.00012405249563662537,
      "loss": 1.6175,
      "step": 5545
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3100783824920654,
      "learning_rate": 0.00012402920846299272,
      "loss": 1.6425,
      "step": 5546
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.510009765625,
      "learning_rate": 0.00012400591990634086,
      "loss": 1.4271,
      "step": 5547
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.114283561706543,
      "learning_rate": 0.00012398262996801023,
      "loss": 1.1599,
      "step": 5548
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.085422992706299,
      "learning_rate": 0.00012395933864934127,
      "loss": 1.6588,
      "step": 5549
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3203495740890503,
      "learning_rate": 0.00012393604595167454,
      "loss": 1.1707,
      "step": 5550
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1772146224975586,
      "learning_rate": 0.00012391275187635068,
      "loss": 1.4645,
      "step": 5551
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7362430095672607,
      "learning_rate": 0.00012388945642471038,
      "loss": 1.0813,
      "step": 5552
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4962224960327148,
      "learning_rate": 0.00012386615959809443,
      "loss": 1.1646,
      "step": 5553
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3976517915725708,
      "learning_rate": 0.00012384286139784372,
      "loss": 1.5601,
      "step": 5554
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8216527700424194,
      "learning_rate": 0.00012381956182529918,
      "loss": 1.2452,
      "step": 5555
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1176977157592773,
      "learning_rate": 0.00012379626088180175,
      "loss": 1.5531,
      "step": 5556
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9901458621025085,
      "learning_rate": 0.00012377295856869268,
      "loss": 1.1036,
      "step": 5557
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3200222253799438,
      "learning_rate": 0.00012374965488731312,
      "loss": 1.5046,
      "step": 5558
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2197048664093018,
      "learning_rate": 0.00012372634983900423,
      "loss": 1.8285,
      "step": 5559
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.9970558881759644,
      "learning_rate": 0.00012370304342510747,
      "loss": 1.4969,
      "step": 5560
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0291457176208496,
      "learning_rate": 0.00012367973564696418,
      "loss": 1.771,
      "step": 5561
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5644028186798096,
      "learning_rate": 0.00012365642650591588,
      "loss": 1.685,
      "step": 5562
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2589964866638184,
      "learning_rate": 0.00012363311600330413,
      "loss": 1.5596,
      "step": 5563
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.0599441528320312,
      "learning_rate": 0.00012360980414047066,
      "loss": 1.2558,
      "step": 5564
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.109736680984497,
      "learning_rate": 0.00012358649091875708,
      "loss": 2.206,
      "step": 5565
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8467128276824951,
      "learning_rate": 0.00012356317633950526,
      "loss": 0.9595,
      "step": 5566
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5666464567184448,
      "learning_rate": 0.00012353986040405712,
      "loss": 1.298,
      "step": 5567
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3921395540237427,
      "learning_rate": 0.0001235165431137546,
      "loss": 1.7106,
      "step": 5568
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1883745193481445,
      "learning_rate": 0.00012349322446993972,
      "loss": 1.301,
      "step": 5569
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5731282234191895,
      "learning_rate": 0.0001234699044739546,
      "loss": 2.1009,
      "step": 5570
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0139864683151245,
      "learning_rate": 0.00012344658312714153,
      "loss": 1.2682,
      "step": 5571
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0100895166397095,
      "learning_rate": 0.00012342326043084266,
      "loss": 1.343,
      "step": 5572
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.465063452720642,
      "learning_rate": 0.00012339993638640044,
      "loss": 1.5692,
      "step": 5573
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.537412166595459,
      "learning_rate": 0.00012337661099515727,
      "loss": 1.6658,
      "step": 5574
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2097480297088623,
      "learning_rate": 0.00012335328425845565,
      "loss": 1.3096,
      "step": 5575
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7976737022399902,
      "learning_rate": 0.00012332995617763816,
      "loss": 1.7671,
      "step": 5576
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9106283783912659,
      "learning_rate": 0.0001233066267540475,
      "loss": 1.1004,
      "step": 5577
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.497208833694458,
      "learning_rate": 0.00012328329598902638,
      "loss": 1.2685,
      "step": 5578
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2219457626342773,
      "learning_rate": 0.00012325996388391766,
      "loss": 1.575,
      "step": 5579
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8758736848831177,
      "learning_rate": 0.0001232366304400642,
      "loss": 2.5707,
      "step": 5580
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0930486917495728,
      "learning_rate": 0.00012321329565880896,
      "loss": 1.7727,
      "step": 5581
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4867801666259766,
      "learning_rate": 0.00012318995954149506,
      "loss": 1.2497,
      "step": 5582
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2806391716003418,
      "learning_rate": 0.00012316662208946557,
      "loss": 1.378,
      "step": 5583
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.634181022644043,
      "learning_rate": 0.00012314328330406368,
      "loss": 0.9219,
      "step": 5584
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4027873277664185,
      "learning_rate": 0.0001231199431866327,
      "loss": 0.795,
      "step": 5585
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6655824184417725,
      "learning_rate": 0.00012309660173851603,
      "loss": 1.7717,
      "step": 5586
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7966758012771606,
      "learning_rate": 0.00012307325896105707,
      "loss": 1.5989,
      "step": 5587
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.723199486732483,
      "learning_rate": 0.00012304991485559926,
      "loss": 1.9823,
      "step": 5588
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.337409019470215,
      "learning_rate": 0.0001230265694234863,
      "loss": 1.9054,
      "step": 5589
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.1370158195495605,
      "learning_rate": 0.00012300322266606178,
      "loss": 2.1207,
      "step": 5590
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2510040998458862,
      "learning_rate": 0.00012297987458466947,
      "loss": 1.3283,
      "step": 5591
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9778848886489868,
      "learning_rate": 0.0001229565251806532,
      "loss": 1.656,
      "step": 5592
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0427697896957397,
      "learning_rate": 0.00012293317445535683,
      "loss": 1.6151,
      "step": 5593
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0327494144439697,
      "learning_rate": 0.0001229098224101243,
      "loss": 0.8661,
      "step": 5594
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.8400288820266724,
      "learning_rate": 0.00012288646904629977,
      "loss": 1.3177,
      "step": 5595
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.7399210929870605,
      "learning_rate": 0.0001228631143652273,
      "loss": 2.076,
      "step": 5596
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8075973987579346,
      "learning_rate": 0.000122839758368251,
      "loss": 1.4766,
      "step": 5597
    },
    {
      "epoch": 0.43,
      "grad_norm": 5.336585521697998,
      "learning_rate": 0.00012281640105671525,
      "loss": 2.1707,
      "step": 5598
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9665307998657227,
      "learning_rate": 0.00012279304243196436,
      "loss": 2.0129,
      "step": 5599
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.365535020828247,
      "learning_rate": 0.00012276968249534274,
      "loss": 1.5718,
      "step": 5600
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2128726243972778,
      "learning_rate": 0.00012274632124819495,
      "loss": 1.2129,
      "step": 5601
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4089510440826416,
      "learning_rate": 0.0001227229586918655,
      "loss": 2.1514,
      "step": 5602
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9937930703163147,
      "learning_rate": 0.00012269959482769904,
      "loss": 1.2126,
      "step": 5603
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0786662101745605,
      "learning_rate": 0.00012267622965704032,
      "loss": 1.2883,
      "step": 5604
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6100890636444092,
      "learning_rate": 0.00012265286318123415,
      "loss": 1.7231,
      "step": 5605
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3162586688995361,
      "learning_rate": 0.00012262949540162537,
      "loss": 1.2705,
      "step": 5606
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4111188650131226,
      "learning_rate": 0.00012260612631955896,
      "loss": 1.3838,
      "step": 5607
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.648836374282837,
      "learning_rate": 0.00012258275593637994,
      "loss": 0.8065,
      "step": 5608
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0085375308990479,
      "learning_rate": 0.00012255938425343338,
      "loss": 1.2784,
      "step": 5609
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4906355142593384,
      "learning_rate": 0.00012253601127206452,
      "loss": 1.2377,
      "step": 5610
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9785369634628296,
      "learning_rate": 0.00012251263699361855,
      "loss": 1.409,
      "step": 5611
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.434516429901123,
      "learning_rate": 0.0001224892614194408,
      "loss": 1.8331,
      "step": 5612
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8941140174865723,
      "learning_rate": 0.00012246588455087668,
      "loss": 1.4418,
      "step": 5613
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5136929750442505,
      "learning_rate": 0.0001224425063892717,
      "loss": 1.3172,
      "step": 5614
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3466111421585083,
      "learning_rate": 0.00012241912693597133,
      "loss": 1.5336,
      "step": 5615
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.89158034324646,
      "learning_rate": 0.00012239574619232125,
      "loss": 1.9672,
      "step": 5616
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0781142711639404,
      "learning_rate": 0.00012237236415966714,
      "loss": 1.0592,
      "step": 5617
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3949785232543945,
      "learning_rate": 0.00012234898083935477,
      "loss": 1.3399,
      "step": 5618
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3448368310928345,
      "learning_rate": 0.00012232559623273,
      "loss": 1.6517,
      "step": 5619
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4211788177490234,
      "learning_rate": 0.00012230221034113874,
      "loss": 1.8358,
      "step": 5620
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.298365831375122,
      "learning_rate": 0.00012227882316592697,
      "loss": 1.2822,
      "step": 5621
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.511641025543213,
      "learning_rate": 0.00012225543470844075,
      "loss": 1.5433,
      "step": 5622
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.206737995147705,
      "learning_rate": 0.00012223204497002622,
      "loss": 1.603,
      "step": 5623
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9113566279411316,
      "learning_rate": 0.00012220865395202966,
      "loss": 1.3552,
      "step": 5624
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0749419927597046,
      "learning_rate": 0.00012218526165579726,
      "loss": 1.8621,
      "step": 5625
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7807440757751465,
      "learning_rate": 0.00012216186808267546,
      "loss": 0.8502,
      "step": 5626
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9912128448486328,
      "learning_rate": 0.00012213847323401061,
      "loss": 1.3522,
      "step": 5627
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.403438925743103,
      "learning_rate": 0.0001221150771111493,
      "loss": 1.1752,
      "step": 5628
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.202784299850464,
      "learning_rate": 0.0001220916797154381,
      "loss": 1.5299,
      "step": 5629
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0378196239471436,
      "learning_rate": 0.00012206828104822363,
      "loss": 1.2897,
      "step": 5630
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6311687231063843,
      "learning_rate": 0.00012204488111085264,
      "loss": 1.9592,
      "step": 5631
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.081082582473755,
      "learning_rate": 0.00012202147990467193,
      "loss": 1.5766,
      "step": 5632
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.477632761001587,
      "learning_rate": 0.00012199807743102831,
      "loss": 0.5811,
      "step": 5633
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4362276792526245,
      "learning_rate": 0.00012197467369126888,
      "loss": 1.9197,
      "step": 5634
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9825575351715088,
      "learning_rate": 0.00012195126868674051,
      "loss": 1.9208,
      "step": 5635
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.1041901111602783,
      "learning_rate": 0.00012192786241879033,
      "loss": 1.241,
      "step": 5636
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.496711492538452,
      "learning_rate": 0.00012190445488876556,
      "loss": 1.123,
      "step": 5637
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9670531749725342,
      "learning_rate": 0.00012188104609801338,
      "loss": 1.406,
      "step": 5638
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1661205291748047,
      "learning_rate": 0.00012185763604788113,
      "loss": 1.4944,
      "step": 5639
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.321664810180664,
      "learning_rate": 0.00012183422473971622,
      "loss": 1.6137,
      "step": 5640
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6292399168014526,
      "learning_rate": 0.000121810812174866,
      "loss": 1.3246,
      "step": 5641
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5616791248321533,
      "learning_rate": 0.00012178739835467813,
      "loss": 2.1324,
      "step": 5642
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1996780633926392,
      "learning_rate": 0.00012176398328050016,
      "loss": 1.357,
      "step": 5643
    },
    {
      "epoch": 0.43,
      "grad_norm": 4.795321941375732,
      "learning_rate": 0.00012174056695367968,
      "loss": 1.9581,
      "step": 5644
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1678228378295898,
      "learning_rate": 0.00012171714937556456,
      "loss": 1.2521,
      "step": 5645
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2613176107406616,
      "learning_rate": 0.00012169373054750255,
      "loss": 1.3607,
      "step": 5646
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.079412579536438,
      "learning_rate": 0.00012167031047084155,
      "loss": 0.968,
      "step": 5647
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3198281526565552,
      "learning_rate": 0.00012164688914692953,
      "loss": 1.6876,
      "step": 5648
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3709254264831543,
      "learning_rate": 0.00012162346657711448,
      "loss": 1.6407,
      "step": 5649
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5700433254241943,
      "learning_rate": 0.00012160004276274453,
      "loss": 1.3504,
      "step": 5650
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0857442617416382,
      "learning_rate": 0.00012157661770516789,
      "loss": 1.6045,
      "step": 5651
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2773563861846924,
      "learning_rate": 0.00012155319140573278,
      "loss": 1.4124,
      "step": 5652
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1469943523406982,
      "learning_rate": 0.0001215297638657875,
      "loss": 1.459,
      "step": 5653
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.7268893718719482,
      "learning_rate": 0.00012150633508668046,
      "loss": 1.3181,
      "step": 5654
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2828798294067383,
      "learning_rate": 0.00012148290506976012,
      "loss": 1.3204,
      "step": 5655
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.224971055984497,
      "learning_rate": 0.00012145947381637499,
      "loss": 0.9268,
      "step": 5656
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4286136627197266,
      "learning_rate": 0.00012143604132787373,
      "loss": 1.5142,
      "step": 5657
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1535985469818115,
      "learning_rate": 0.00012141260760560497,
      "loss": 1.664,
      "step": 5658
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0065839290618896,
      "learning_rate": 0.00012138917265091742,
      "loss": 1.115,
      "step": 5659
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.562692880630493,
      "learning_rate": 0.00012136573646515999,
      "loss": 1.7331,
      "step": 5660
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6028488874435425,
      "learning_rate": 0.00012134229904968148,
      "loss": 1.0586,
      "step": 5661
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0110385417938232,
      "learning_rate": 0.00012131886040583089,
      "loss": 1.213,
      "step": 5662
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2308675050735474,
      "learning_rate": 0.00012129542053495726,
      "loss": 1.9479,
      "step": 5663
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.194256067276001,
      "learning_rate": 0.00012127197943840966,
      "loss": 1.6501,
      "step": 5664
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5068554878234863,
      "learning_rate": 0.00012124853711753727,
      "loss": 1.5043,
      "step": 5665
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3103874921798706,
      "learning_rate": 0.00012122509357368934,
      "loss": 1.4173,
      "step": 5666
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6785856485366821,
      "learning_rate": 0.00012120164880821518,
      "loss": 1.6366,
      "step": 5667
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4314544200897217,
      "learning_rate": 0.00012117820282246414,
      "loss": 2.9164,
      "step": 5668
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5080760717391968,
      "learning_rate": 0.00012115475561778573,
      "loss": 0.6625,
      "step": 5669
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.19675350189209,
      "learning_rate": 0.00012113130719552943,
      "loss": 1.21,
      "step": 5670
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0403908491134644,
      "learning_rate": 0.00012110785755704482,
      "loss": 1.2661,
      "step": 5671
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3256020545959473,
      "learning_rate": 0.00012108440670368159,
      "loss": 1.592,
      "step": 5672
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.099379301071167,
      "learning_rate": 0.00012106095463678946,
      "loss": 1.7927,
      "step": 5673
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.23281729221344,
      "learning_rate": 0.00012103750135771823,
      "loss": 1.7783,
      "step": 5674
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.129998207092285,
      "learning_rate": 0.00012101404686781776,
      "loss": 1.4586,
      "step": 5675
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.1828925609588623,
      "learning_rate": 0.00012099059116843807,
      "loss": 0.9047,
      "step": 5676
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.350395679473877,
      "learning_rate": 0.00012096713426092904,
      "loss": 0.8372,
      "step": 5677
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9592651724815369,
      "learning_rate": 0.00012094367614664084,
      "loss": 1.5763,
      "step": 5678
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.0631903409957886,
      "learning_rate": 0.0001209202168269236,
      "loss": 1.3446,
      "step": 5679
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1879619359970093,
      "learning_rate": 0.00012089675630312754,
      "loss": 1.0261,
      "step": 5680
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.469228744506836,
      "learning_rate": 0.00012087329457660293,
      "loss": 2.0974,
      "step": 5681
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0104525089263916,
      "learning_rate": 0.00012084983164870012,
      "loss": 1.5533,
      "step": 5682
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.157353401184082,
      "learning_rate": 0.00012082636752076957,
      "loss": 1.8889,
      "step": 5683
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8132845163345337,
      "learning_rate": 0.00012080290219416177,
      "loss": 1.4238,
      "step": 5684
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1971354484558105,
      "learning_rate": 0.00012077943567022727,
      "loss": 1.0499,
      "step": 5685
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9345431327819824,
      "learning_rate": 0.00012075596795031672,
      "loss": 1.8455,
      "step": 5686
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.7949820160865784,
      "learning_rate": 0.0001207324990357808,
      "loss": 0.9549,
      "step": 5687
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9917498826980591,
      "learning_rate": 0.00012070902892797029,
      "loss": 1.5273,
      "step": 5688
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.71502685546875,
      "learning_rate": 0.00012068555762823604,
      "loss": 1.6836,
      "step": 5689
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.912186861038208,
      "learning_rate": 0.00012066208513792896,
      "loss": 1.5862,
      "step": 5690
    },
    {
      "epoch": 0.43,
      "grad_norm": 6.577142238616943,
      "learning_rate": 0.00012063861145840003,
      "loss": 2.1441,
      "step": 5691
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.460188865661621,
      "learning_rate": 0.00012061513659100024,
      "loss": 1.4654,
      "step": 5692
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6239447593688965,
      "learning_rate": 0.00012059166053708076,
      "loss": 2.0795,
      "step": 5693
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.375468134880066,
      "learning_rate": 0.00012056818329799278,
      "loss": 1.0208,
      "step": 5694
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1118338108062744,
      "learning_rate": 0.00012054470487508751,
      "loss": 1.4219,
      "step": 5695
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5038669109344482,
      "learning_rate": 0.0001205212252697163,
      "loss": 1.7845,
      "step": 5696
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2359768152236938,
      "learning_rate": 0.00012049774448323052,
      "loss": 1.3315,
      "step": 5697
    },
    {
      "epoch": 0.43,
      "grad_norm": 5.80113410949707,
      "learning_rate": 0.0001204742625169816,
      "loss": 1.799,
      "step": 5698
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.233830213546753,
      "learning_rate": 0.00012045077937232112,
      "loss": 1.6337,
      "step": 5699
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.9810218811035156,
      "learning_rate": 0.00012042729505060067,
      "loss": 1.2181,
      "step": 5700
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1238638162612915,
      "learning_rate": 0.0001204038095531718,
      "loss": 1.5422,
      "step": 5701
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.685044288635254,
      "learning_rate": 0.00012038032288138636,
      "loss": 1.4052,
      "step": 5702
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4291187524795532,
      "learning_rate": 0.00012035683503659611,
      "loss": 1.708,
      "step": 5703
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3680840730667114,
      "learning_rate": 0.0001203333460201529,
      "loss": 1.3105,
      "step": 5704
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6682177782058716,
      "learning_rate": 0.00012030985583340861,
      "loss": 1.3984,
      "step": 5705
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.427168369293213,
      "learning_rate": 0.00012028636447771531,
      "loss": 1.4665,
      "step": 5706
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.026461362838745,
      "learning_rate": 0.00012026287195442503,
      "loss": 2.073,
      "step": 5707
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3745073080062866,
      "learning_rate": 0.0001202393782648899,
      "loss": 1.0656,
      "step": 5708
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6092119216918945,
      "learning_rate": 0.00012021588341046213,
      "loss": 1.0677,
      "step": 5709
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9814894795417786,
      "learning_rate": 0.00012019238739249397,
      "loss": 0.9182,
      "step": 5710
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2806237936019897,
      "learning_rate": 0.00012016889021233774,
      "loss": 1.3941,
      "step": 5711
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.080013394355774,
      "learning_rate": 0.00012014539187134586,
      "loss": 1.3501,
      "step": 5712
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.448563814163208,
      "learning_rate": 0.0001201218923708708,
      "loss": 2.0275,
      "step": 5713
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2455005645751953,
      "learning_rate": 0.0001200983917122651,
      "loss": 1.4842,
      "step": 5714
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9028080701828003,
      "learning_rate": 0.00012007488989688127,
      "loss": 1.7941,
      "step": 5715
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4107482433319092,
      "learning_rate": 0.0001200513869260721,
      "loss": 1.607,
      "step": 5716
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.1651337146759033,
      "learning_rate": 0.00012002788280119024,
      "loss": 0.9631,
      "step": 5717
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.512507915496826,
      "learning_rate": 0.0001200043775235885,
      "loss": 1.2291,
      "step": 5718
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1829203367233276,
      "learning_rate": 0.00011998087109461978,
      "loss": 1.4672,
      "step": 5719
    },
    {
      "epoch": 0.44,
      "grad_norm": 4.892756462097168,
      "learning_rate": 0.00011995736351563697,
      "loss": 1.6138,
      "step": 5720
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.176554560661316,
      "learning_rate": 0.00011993385478799308,
      "loss": 1.0038,
      "step": 5721
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9675589203834534,
      "learning_rate": 0.00011991034491304118,
      "loss": 1.3101,
      "step": 5722
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4128113985061646,
      "learning_rate": 0.00011988683389213442,
      "loss": 1.2456,
      "step": 5723
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.186217784881592,
      "learning_rate": 0.0001198633217266259,
      "loss": 1.2036,
      "step": 5724
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4385558366775513,
      "learning_rate": 0.000119839808417869,
      "loss": 0.9907,
      "step": 5725
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.437969446182251,
      "learning_rate": 0.00011981629396721699,
      "loss": 1.1706,
      "step": 5726
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6251749992370605,
      "learning_rate": 0.00011979277837602326,
      "loss": 1.4506,
      "step": 5727
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.28815758228302,
      "learning_rate": 0.00011976926164564127,
      "loss": 1.0745,
      "step": 5728
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4795676469802856,
      "learning_rate": 0.00011974574377742454,
      "loss": 1.5794,
      "step": 5729
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0621237754821777,
      "learning_rate": 0.00011972222477272663,
      "loss": 1.7243,
      "step": 5730
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.611652374267578,
      "learning_rate": 0.00011969870463290127,
      "loss": 1.6294,
      "step": 5731
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4798803329467773,
      "learning_rate": 0.00011967518335930214,
      "loss": 1.1266,
      "step": 5732
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.136101484298706,
      "learning_rate": 0.00011965166095328301,
      "loss": 1.4549,
      "step": 5733
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.484019160270691,
      "learning_rate": 0.00011962813741619777,
      "loss": 1.2182,
      "step": 5734
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.379972219467163,
      "learning_rate": 0.0001196046127494003,
      "loss": 1.2712,
      "step": 5735
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9849005341529846,
      "learning_rate": 0.00011958108695424452,
      "loss": 1.3622,
      "step": 5736
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0504893064498901,
      "learning_rate": 0.00011955756003208462,
      "loss": 1.7432,
      "step": 5737
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2427239418029785,
      "learning_rate": 0.00011953403198427462,
      "loss": 1.5163,
      "step": 5738
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.567328929901123,
      "learning_rate": 0.00011951050281216866,
      "loss": 1.6202,
      "step": 5739
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1762170791625977,
      "learning_rate": 0.00011948697251712109,
      "loss": 1.3988,
      "step": 5740
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1807403564453125,
      "learning_rate": 0.00011946344110048611,
      "loss": 1.0752,
      "step": 5741
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2510783672332764,
      "learning_rate": 0.00011943990856361813,
      "loss": 1.3197,
      "step": 5742
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3068978786468506,
      "learning_rate": 0.00011941637490787159,
      "loss": 2.3213,
      "step": 5743
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.14883553981781,
      "learning_rate": 0.00011939284013460096,
      "loss": 1.2437,
      "step": 5744
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2900184392929077,
      "learning_rate": 0.00011936930424516082,
      "loss": 1.7442,
      "step": 5745
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.7542529106140137,
      "learning_rate": 0.00011934576724090582,
      "loss": 2.1762,
      "step": 5746
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6820182800292969,
      "learning_rate": 0.00011932222912319064,
      "loss": 1.5568,
      "step": 5747
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6664016246795654,
      "learning_rate": 0.00011929868989336997,
      "loss": 1.7083,
      "step": 5748
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4697507619857788,
      "learning_rate": 0.0001192751495527987,
      "loss": 1.5163,
      "step": 5749
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7717052698135376,
      "learning_rate": 0.0001192516081028317,
      "loss": 1.4056,
      "step": 5750
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.233891248703003,
      "learning_rate": 0.00011922806554482389,
      "loss": 1.5464,
      "step": 5751
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.740100383758545,
      "learning_rate": 0.00011920452188013029,
      "loss": 0.9898,
      "step": 5752
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2683831453323364,
      "learning_rate": 0.00011918097711010601,
      "loss": 1.9783,
      "step": 5753
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.100399136543274,
      "learning_rate": 0.0001191574312361061,
      "loss": 1.2051,
      "step": 5754
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5718166828155518,
      "learning_rate": 0.00011913388425948584,
      "loss": 0.9739,
      "step": 5755
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.033764123916626,
      "learning_rate": 0.00011911033618160052,
      "loss": 1.8603,
      "step": 5756
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7559657096862793,
      "learning_rate": 0.00011908678700380535,
      "loss": 1.7022,
      "step": 5757
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0544904470443726,
      "learning_rate": 0.0001190632367274558,
      "loss": 1.4353,
      "step": 5758
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5527094602584839,
      "learning_rate": 0.00011903968535390734,
      "loss": 1.476,
      "step": 5759
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.298027515411377,
      "learning_rate": 0.00011901613288451546,
      "loss": 1.3355,
      "step": 5760
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3595776557922363,
      "learning_rate": 0.0001189925793206357,
      "loss": 1.3458,
      "step": 5761
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9329038858413696,
      "learning_rate": 0.00011896902466362377,
      "loss": 1.3134,
      "step": 5762
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.397525429725647,
      "learning_rate": 0.00011894546891483531,
      "loss": 1.7626,
      "step": 5763
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.8673954606056213,
      "learning_rate": 0.00011892191207562617,
      "loss": 0.6566,
      "step": 5764
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6379640102386475,
      "learning_rate": 0.00011889835414735214,
      "loss": 1.3958,
      "step": 5765
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4584341049194336,
      "learning_rate": 0.00011887479513136907,
      "loss": 1.1288,
      "step": 5766
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.366134524345398,
      "learning_rate": 0.00011885123502903301,
      "loss": 1.4458,
      "step": 5767
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.038360118865967,
      "learning_rate": 0.0001188276738416999,
      "loss": 1.5572,
      "step": 5768
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7851502895355225,
      "learning_rate": 0.00011880411157072584,
      "loss": 1.883,
      "step": 5769
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2117531299591064,
      "learning_rate": 0.00011878054821746703,
      "loss": 1.1863,
      "step": 5770
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4874166250228882,
      "learning_rate": 0.0001187569837832796,
      "loss": 1.5281,
      "step": 5771
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9830332398414612,
      "learning_rate": 0.00011873341826951985,
      "loss": 1.6831,
      "step": 5772
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3251607418060303,
      "learning_rate": 0.00011870985167754409,
      "loss": 1.0596,
      "step": 5773
    },
    {
      "epoch": 0.44,
      "grad_norm": 4.3994269371032715,
      "learning_rate": 0.00011868628400870878,
      "loss": 1.9888,
      "step": 5774
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.7775556445121765,
      "learning_rate": 0.00011866271526437031,
      "loss": 0.7998,
      "step": 5775
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.981614351272583,
      "learning_rate": 0.00011863914544588521,
      "loss": 0.6887,
      "step": 5776
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6651197671890259,
      "learning_rate": 0.00011861557455461005,
      "loss": 1.4336,
      "step": 5777
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7665222883224487,
      "learning_rate": 0.00011859200259190151,
      "loss": 1.7828,
      "step": 5778
    },
    {
      "epoch": 0.44,
      "grad_norm": 4.1696062088012695,
      "learning_rate": 0.00011856842955911624,
      "loss": 2.5619,
      "step": 5779
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9573651552200317,
      "learning_rate": 0.00011854485545761108,
      "loss": 0.9852,
      "step": 5780
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.18479585647583,
      "learning_rate": 0.00011852128028874276,
      "loss": 1.2848,
      "step": 5781
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7206188440322876,
      "learning_rate": 0.00011849770405386824,
      "loss": 1.8681,
      "step": 5782
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4797677993774414,
      "learning_rate": 0.00011847412675434441,
      "loss": 1.6351,
      "step": 5783
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0824834108352661,
      "learning_rate": 0.00011845054839152833,
      "loss": 1.7609,
      "step": 5784
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.8522696495056152,
      "learning_rate": 0.00011842696896677708,
      "loss": 1.3113,
      "step": 5785
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3057973384857178,
      "learning_rate": 0.00011840338848144769,
      "loss": 1.4563,
      "step": 5786
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0792821645736694,
      "learning_rate": 0.00011837980693689747,
      "loss": 1.5579,
      "step": 5787
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5056208372116089,
      "learning_rate": 0.00011835622433448361,
      "loss": 1.1841,
      "step": 5788
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2048996686935425,
      "learning_rate": 0.00011833264067556347,
      "loss": 0.9461,
      "step": 5789
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0745301246643066,
      "learning_rate": 0.00011830905596149437,
      "loss": 1.8845,
      "step": 5790
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3502744436264038,
      "learning_rate": 0.00011828547019363378,
      "loss": 1.6301,
      "step": 5791
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8528401851654053,
      "learning_rate": 0.00011826188337333918,
      "loss": 1.346,
      "step": 5792
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3278369903564453,
      "learning_rate": 0.00011823829550196817,
      "loss": 1.3518,
      "step": 5793
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1601215600967407,
      "learning_rate": 0.00011821470658087832,
      "loss": 1.6359,
      "step": 5794
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8298910856246948,
      "learning_rate": 0.0001181911166114273,
      "loss": 1.4459,
      "step": 5795
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.251892328262329,
      "learning_rate": 0.00011816752559497288,
      "loss": 1.589,
      "step": 5796
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1237815618515015,
      "learning_rate": 0.00011814393353287287,
      "loss": 2.2372,
      "step": 5797
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1965701580047607,
      "learning_rate": 0.0001181203404264851,
      "loss": 1.3994,
      "step": 5798
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3352171182632446,
      "learning_rate": 0.00011809674627716752,
      "loss": 1.1339,
      "step": 5799
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.0747252702713013,
      "learning_rate": 0.00011807315108627806,
      "loss": 1.4497,
      "step": 5800
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1299198865890503,
      "learning_rate": 0.00011804955485517475,
      "loss": 1.4637,
      "step": 5801
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.9700508713722229,
      "learning_rate": 0.00011802595758521578,
      "loss": 0.7184,
      "step": 5802
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8261804580688477,
      "learning_rate": 0.00011800235927775925,
      "loss": 0.9925,
      "step": 5803
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5696810483932495,
      "learning_rate": 0.00011797875993416334,
      "loss": 1.1956,
      "step": 5804
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9356526136398315,
      "learning_rate": 0.0001179551595557864,
      "loss": 1.122,
      "step": 5805
    },
    {
      "epoch": 0.44,
      "grad_norm": 13.561042785644531,
      "learning_rate": 0.00011793155814398674,
      "loss": 2.2517,
      "step": 5806
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7962429523468018,
      "learning_rate": 0.00011790795570012274,
      "loss": 1.3514,
      "step": 5807
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1698757410049438,
      "learning_rate": 0.0001178843522255529,
      "loss": 1.3225,
      "step": 5808
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1097861528396606,
      "learning_rate": 0.00011786074772163567,
      "loss": 1.1721,
      "step": 5809
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.582007884979248,
      "learning_rate": 0.00011783714218972965,
      "loss": 1.5715,
      "step": 5810
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4436274766921997,
      "learning_rate": 0.00011781353563119352,
      "loss": 1.3215,
      "step": 5811
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5518574714660645,
      "learning_rate": 0.00011778992804738594,
      "loss": 1.5171,
      "step": 5812
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7166430950164795,
      "learning_rate": 0.00011776631943966565,
      "loss": 1.5036,
      "step": 5813
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.8729252219200134,
      "learning_rate": 0.00011774270980939148,
      "loss": 1.2375,
      "step": 5814
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2205482721328735,
      "learning_rate": 0.0001177190991579223,
      "loss": 1.2134,
      "step": 5815
    },
    {
      "epoch": 0.44,
      "grad_norm": 5.592175006866455,
      "learning_rate": 0.00011769548748661698,
      "loss": 1.234,
      "step": 5816
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1604701280593872,
      "learning_rate": 0.00011767187479683464,
      "loss": 1.1565,
      "step": 5817
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.0561795234680176,
      "learning_rate": 0.00011764826108993419,
      "loss": 1.9944,
      "step": 5818
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.924837350845337,
      "learning_rate": 0.00011762464636727476,
      "loss": 1.8211,
      "step": 5819
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4822856187820435,
      "learning_rate": 0.0001176010306302156,
      "loss": 1.6808,
      "step": 5820
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.926737904548645,
      "learning_rate": 0.00011757741388011588,
      "loss": 1.4755,
      "step": 5821
    },
    {
      "epoch": 0.44,
      "grad_norm": 5.048051834106445,
      "learning_rate": 0.00011755379611833483,
      "loss": 1.4245,
      "step": 5822
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2720232009887695,
      "learning_rate": 0.00011753017734623186,
      "loss": 1.4587,
      "step": 5823
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1264115571975708,
      "learning_rate": 0.00011750655756516633,
      "loss": 1.254,
      "step": 5824
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6879667043685913,
      "learning_rate": 0.00011748293677649769,
      "loss": 1.7037,
      "step": 5825
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.52594792842865,
      "learning_rate": 0.00011745931498158547,
      "loss": 1.3921,
      "step": 5826
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7453678846359253,
      "learning_rate": 0.00011743569218178927,
      "loss": 1.5451,
      "step": 5827
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.104270339012146,
      "learning_rate": 0.00011741206837846862,
      "loss": 1.0133,
      "step": 5828
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9959317445755005,
      "learning_rate": 0.00011738844357298332,
      "loss": 1.582,
      "step": 5829
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1432123184204102,
      "learning_rate": 0.00011736481776669306,
      "loss": 1.4587,
      "step": 5830
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.114980697631836,
      "learning_rate": 0.00011734119096095762,
      "loss": 1.6197,
      "step": 5831
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.343292713165283,
      "learning_rate": 0.00011731756315713689,
      "loss": 1.6502,
      "step": 5832
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7981204986572266,
      "learning_rate": 0.00011729393435659077,
      "loss": 1.1082,
      "step": 5833
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.169118881225586,
      "learning_rate": 0.00011727030456067924,
      "loss": 2.4096,
      "step": 5834
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0387108325958252,
      "learning_rate": 0.00011724667377076235,
      "loss": 1.7935,
      "step": 5835
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.073873519897461,
      "learning_rate": 0.00011722304198820017,
      "loss": 1.4849,
      "step": 5836
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.9751408100128174,
      "learning_rate": 0.00011719940921435282,
      "loss": 1.8817,
      "step": 5837
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3667649030685425,
      "learning_rate": 0.00011717577545058055,
      "loss": 1.2325,
      "step": 5838
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.1043546199798584,
      "learning_rate": 0.0001171521406982436,
      "loss": 1.7543,
      "step": 5839
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.010563850402832,
      "learning_rate": 0.00011712850495870225,
      "loss": 1.6355,
      "step": 5840
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3849667310714722,
      "learning_rate": 0.00011710486823331694,
      "loss": 1.7308,
      "step": 5841
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.545238971710205,
      "learning_rate": 0.00011708123052344804,
      "loss": 2.1101,
      "step": 5842
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3972039222717285,
      "learning_rate": 0.00011705759183045606,
      "loss": 1.1519,
      "step": 5843
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8137922286987305,
      "learning_rate": 0.00011703395215570156,
      "loss": 1.6036,
      "step": 5844
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.196013331413269,
      "learning_rate": 0.0001170103115005451,
      "loss": 1.8648,
      "step": 5845
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1073329448699951,
      "learning_rate": 0.0001169866698663474,
      "loss": 1.6488,
      "step": 5846
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3412739038467407,
      "learning_rate": 0.0001169630272544691,
      "loss": 1.618,
      "step": 5847
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7789276838302612,
      "learning_rate": 0.000116939383666271,
      "loss": 1.602,
      "step": 5848
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.958662271499634,
      "learning_rate": 0.0001169157391031139,
      "loss": 1.4469,
      "step": 5849
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0491533279418945,
      "learning_rate": 0.00011689209356635877,
      "loss": 1.6568,
      "step": 5850
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3311073780059814,
      "learning_rate": 0.00011686844705736642,
      "loss": 1.2103,
      "step": 5851
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1595672369003296,
      "learning_rate": 0.00011684479957749792,
      "loss": 1.6161,
      "step": 5852
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6035913228988647,
      "learning_rate": 0.00011682115112811432,
      "loss": 1.3621,
      "step": 5853
    },
    {
      "epoch": 0.45,
      "grad_norm": 4.171332836151123,
      "learning_rate": 0.00011679750171057669,
      "loss": 1.5785,
      "step": 5854
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0168204307556152,
      "learning_rate": 0.00011677385132624621,
      "loss": 1.4042,
      "step": 5855
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3227742910385132,
      "learning_rate": 0.00011675019997648408,
      "loss": 1.371,
      "step": 5856
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.335975170135498,
      "learning_rate": 0.00011672654766265156,
      "loss": 1.8022,
      "step": 5857
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2640730142593384,
      "learning_rate": 0.00011670289438611004,
      "loss": 2.4428,
      "step": 5858
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2228193283081055,
      "learning_rate": 0.00011667924014822085,
      "loss": 1.0242,
      "step": 5859
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4152746200561523,
      "learning_rate": 0.00011665558495034546,
      "loss": 1.5206,
      "step": 5860
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.647914171218872,
      "learning_rate": 0.00011663192879384533,
      "loss": 2.1924,
      "step": 5861
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2288206815719604,
      "learning_rate": 0.00011660827168008199,
      "loss": 1.5051,
      "step": 5862
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4235854148864746,
      "learning_rate": 0.00011658461361041708,
      "loss": 1.6533,
      "step": 5863
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.198373556137085,
      "learning_rate": 0.0001165609545862123,
      "loss": 1.3723,
      "step": 5864
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1880245208740234,
      "learning_rate": 0.00011653729460882927,
      "loss": 1.2974,
      "step": 5865
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8537185192108154,
      "learning_rate": 0.00011651363367962979,
      "loss": 1.3502,
      "step": 5866
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5940042734146118,
      "learning_rate": 0.00011648997179997571,
      "loss": 1.8619,
      "step": 5867
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1829240322113037,
      "learning_rate": 0.0001164663089712289,
      "loss": 1.2786,
      "step": 5868
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3295111656188965,
      "learning_rate": 0.0001164426451947513,
      "loss": 1.087,
      "step": 5869
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0637342929840088,
      "learning_rate": 0.00011641898047190485,
      "loss": 1.3747,
      "step": 5870
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.241842746734619,
      "learning_rate": 0.00011639531480405164,
      "loss": 1.3618,
      "step": 5871
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2300138473510742,
      "learning_rate": 0.00011637164819255371,
      "loss": 1.0263,
      "step": 5872
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5555310249328613,
      "learning_rate": 0.00011634798063877326,
      "loss": 1.7767,
      "step": 5873
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1036795377731323,
      "learning_rate": 0.00011632431214407252,
      "loss": 1.2506,
      "step": 5874
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.001323938369751,
      "learning_rate": 0.00011630064270981367,
      "loss": 1.5962,
      "step": 5875
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4498238563537598,
      "learning_rate": 0.00011627697233735906,
      "loss": 1.5594,
      "step": 5876
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7643963098526,
      "learning_rate": 0.00011625330102807105,
      "loss": 1.1381,
      "step": 5877
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9146062135696411,
      "learning_rate": 0.0001162296287833121,
      "loss": 1.8397,
      "step": 5878
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3362253904342651,
      "learning_rate": 0.00011620595560444463,
      "loss": 0.9385,
      "step": 5879
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2705930471420288,
      "learning_rate": 0.00011618228149283117,
      "loss": 1.234,
      "step": 5880
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.18796968460083,
      "learning_rate": 0.00011615860644983432,
      "loss": 1.1128,
      "step": 5881
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2819643020629883,
      "learning_rate": 0.00011613493047681671,
      "loss": 1.3982,
      "step": 5882
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9436068534851074,
      "learning_rate": 0.0001161112535751411,
      "loss": 1.2935,
      "step": 5883
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.446242094039917,
      "learning_rate": 0.00011608757574617009,
      "loss": 0.9227,
      "step": 5884
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5238834619522095,
      "learning_rate": 0.00011606389699126656,
      "loss": 2.025,
      "step": 5885
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.38045072555542,
      "learning_rate": 0.00011604021731179338,
      "loss": 1.3326,
      "step": 5886
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2905995845794678,
      "learning_rate": 0.00011601653670911339,
      "loss": 1.2152,
      "step": 5887
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3334646224975586,
      "learning_rate": 0.00011599285518458958,
      "loss": 0.8736,
      "step": 5888
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.6099660396575928,
      "learning_rate": 0.00011596917273958497,
      "loss": 1.5273,
      "step": 5889
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4268900156021118,
      "learning_rate": 0.00011594548937546257,
      "loss": 1.3119,
      "step": 5890
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4900729656219482,
      "learning_rate": 0.00011592180509358554,
      "loss": 1.7198,
      "step": 5891
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.8955259919166565,
      "learning_rate": 0.00011589811989531703,
      "loss": 0.9799,
      "step": 5892
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1454724073410034,
      "learning_rate": 0.00011587443378202028,
      "loss": 1.1373,
      "step": 5893
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2768940925598145,
      "learning_rate": 0.00011585074675505854,
      "loss": 1.6625,
      "step": 5894
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.5566318035125732,
      "learning_rate": 0.00011582705881579514,
      "loss": 1.2975,
      "step": 5895
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.227869987487793,
      "learning_rate": 0.00011580336996559343,
      "loss": 0.9769,
      "step": 5896
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4639400243759155,
      "learning_rate": 0.00011577968020581691,
      "loss": 1.3951,
      "step": 5897
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6240507364273071,
      "learning_rate": 0.000115755989537829,
      "loss": 1.0157,
      "step": 5898
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4020684957504272,
      "learning_rate": 0.00011573229796299325,
      "loss": 1.2225,
      "step": 5899
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1822130680084229,
      "learning_rate": 0.00011570860548267323,
      "loss": 1.2994,
      "step": 5900
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.593165874481201,
      "learning_rate": 0.00011568491209823265,
      "loss": 2.1586,
      "step": 5901
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4102007150650024,
      "learning_rate": 0.0001156612178110351,
      "loss": 1.7882,
      "step": 5902
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6114422082901,
      "learning_rate": 0.0001156375226224444,
      "loss": 1.1177,
      "step": 5903
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9612771272659302,
      "learning_rate": 0.00011561382653382431,
      "loss": 1.4731,
      "step": 5904
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6257970333099365,
      "learning_rate": 0.00011559012954653865,
      "loss": 0.751,
      "step": 5905
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.598261833190918,
      "learning_rate": 0.00011556643166195138,
      "loss": 1.3624,
      "step": 5906
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3848626613616943,
      "learning_rate": 0.00011554273288142643,
      "loss": 1.6583,
      "step": 5907
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1134626865386963,
      "learning_rate": 0.00011551903320632776,
      "loss": 1.985,
      "step": 5908
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.087327718734741,
      "learning_rate": 0.00011549533263801946,
      "loss": 1.2728,
      "step": 5909
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.940077304840088,
      "learning_rate": 0.00011547163117786561,
      "loss": 1.2962,
      "step": 5910
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8834294080734253,
      "learning_rate": 0.00011544792882723043,
      "loss": 2.0441,
      "step": 5911
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0265378952026367,
      "learning_rate": 0.00011542422558747807,
      "loss": 1.1892,
      "step": 5912
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.754533290863037,
      "learning_rate": 0.00011540052145997275,
      "loss": 1.5849,
      "step": 5913
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0266802310943604,
      "learning_rate": 0.00011537681644607888,
      "loss": 1.4373,
      "step": 5914
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1713197231292725,
      "learning_rate": 0.00011535311054716073,
      "loss": 1.2843,
      "step": 5915
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0641791820526123,
      "learning_rate": 0.00011532940376458277,
      "loss": 1.8957,
      "step": 5916
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1990175247192383,
      "learning_rate": 0.00011530569609970943,
      "loss": 1.2124,
      "step": 5917
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5343154668807983,
      "learning_rate": 0.00011528198755390523,
      "loss": 1.5422,
      "step": 5918
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1749005317687988,
      "learning_rate": 0.00011525827812853472,
      "loss": 0.7195,
      "step": 5919
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5652594566345215,
      "learning_rate": 0.00011523456782496253,
      "loss": 1.6125,
      "step": 5920
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2325904369354248,
      "learning_rate": 0.00011521085664455335,
      "loss": 1.8085,
      "step": 5921
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7484347820281982,
      "learning_rate": 0.00011518714458867184,
      "loss": 1.6512,
      "step": 5922
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5773983001708984,
      "learning_rate": 0.00011516343165868279,
      "loss": 1.2775,
      "step": 5923
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8330934047698975,
      "learning_rate": 0.00011513971785595103,
      "loss": 1.5207,
      "step": 5924
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.660016655921936,
      "learning_rate": 0.00011511600318184141,
      "loss": 1.1898,
      "step": 5925
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9795054197311401,
      "learning_rate": 0.00011509228763771886,
      "loss": 1.8022,
      "step": 5926
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4826092720031738,
      "learning_rate": 0.00011506857122494831,
      "loss": 1.6526,
      "step": 5927
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.077924132347107,
      "learning_rate": 0.00011504485394489481,
      "loss": 1.0057,
      "step": 5928
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.266506552696228,
      "learning_rate": 0.00011502113579892344,
      "loss": 1.1852,
      "step": 5929
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.339256525039673,
      "learning_rate": 0.00011499741678839928,
      "loss": 1.8736,
      "step": 5930
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0685882568359375,
      "learning_rate": 0.00011497369691468748,
      "loss": 0.9195,
      "step": 5931
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.0165905952453613,
      "learning_rate": 0.00011494997617915333,
      "loss": 1.6256,
      "step": 5932
    },
    {
      "epoch": 0.45,
      "grad_norm": 5.230472564697266,
      "learning_rate": 0.00011492625458316202,
      "loss": 2.2805,
      "step": 5933
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.2341487407684326,
      "learning_rate": 0.00011490253212807891,
      "loss": 1.7686,
      "step": 5934
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.851362943649292,
      "learning_rate": 0.00011487880881526934,
      "loss": 2.2464,
      "step": 5935
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1031793355941772,
      "learning_rate": 0.00011485508464609873,
      "loss": 1.2904,
      "step": 5936
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8975493907928467,
      "learning_rate": 0.00011483135962193252,
      "loss": 1.3575,
      "step": 5937
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3753156661987305,
      "learning_rate": 0.00011480763374413626,
      "loss": 1.6489,
      "step": 5938
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1379879713058472,
      "learning_rate": 0.00011478390701407549,
      "loss": 1.6678,
      "step": 5939
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5552558898925781,
      "learning_rate": 0.00011476017943311583,
      "loss": 1.4405,
      "step": 5940
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2221901416778564,
      "learning_rate": 0.00011473645100262294,
      "loss": 1.092,
      "step": 5941
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.0899771451950073,
      "learning_rate": 0.00011471272172396253,
      "loss": 1.6872,
      "step": 5942
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.133545160293579,
      "learning_rate": 0.0001146889915985003,
      "loss": 1.213,
      "step": 5943
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.115592122077942,
      "learning_rate": 0.00011466526062760217,
      "loss": 1.271,
      "step": 5944
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2790279388427734,
      "learning_rate": 0.00011464152881263388,
      "loss": 1.7644,
      "step": 5945
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.174192190170288,
      "learning_rate": 0.00011461779615496138,
      "loss": 1.2213,
      "step": 5946
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5561482906341553,
      "learning_rate": 0.00011459406265595063,
      "loss": 1.7441,
      "step": 5947
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.23005747795105,
      "learning_rate": 0.00011457032831696762,
      "loss": 1.6044,
      "step": 5948
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.367485761642456,
      "learning_rate": 0.00011454659313937839,
      "loss": 1.0599,
      "step": 5949
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2965831756591797,
      "learning_rate": 0.00011452285712454904,
      "loss": 1.4856,
      "step": 5950
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.29799485206604,
      "learning_rate": 0.00011449912027384572,
      "loss": 1.3747,
      "step": 5951
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4893085956573486,
      "learning_rate": 0.00011447538258863462,
      "loss": 1.4614,
      "step": 5952
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2745839357376099,
      "learning_rate": 0.00011445164407028198,
      "loss": 1.7613,
      "step": 5953
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.362143874168396,
      "learning_rate": 0.0001144279047201541,
      "loss": 1.4057,
      "step": 5954
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8408766984939575,
      "learning_rate": 0.00011440416453961728,
      "loss": 1.2901,
      "step": 5955
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.9736838936805725,
      "learning_rate": 0.00011438042353003794,
      "loss": 0.8459,
      "step": 5956
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2334150075912476,
      "learning_rate": 0.0001143566816927825,
      "loss": 1.1881,
      "step": 5957
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.970848798751831,
      "learning_rate": 0.00011433293902921746,
      "loss": 0.85,
      "step": 5958
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.562652111053467,
      "learning_rate": 0.00011430919554070931,
      "loss": 1.3154,
      "step": 5959
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1315631866455078,
      "learning_rate": 0.00011428545122862464,
      "loss": 0.7067,
      "step": 5960
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5431959629058838,
      "learning_rate": 0.00011426170609433004,
      "loss": 1.7019,
      "step": 5961
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.433237910270691,
      "learning_rate": 0.00011423796013919226,
      "loss": 1.1304,
      "step": 5962
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3603284358978271,
      "learning_rate": 0.00011421421336457796,
      "loss": 1.0063,
      "step": 5963
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5992279052734375,
      "learning_rate": 0.00011419046577185388,
      "loss": 1.1672,
      "step": 5964
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7859623432159424,
      "learning_rate": 0.00011416671736238689,
      "loss": 1.3759,
      "step": 5965
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8190901279449463,
      "learning_rate": 0.00011414296813754381,
      "loss": 1.8202,
      "step": 5966
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.388482689857483,
      "learning_rate": 0.00011411921809869156,
      "loss": 1.6739,
      "step": 5967
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3263579607009888,
      "learning_rate": 0.00011409546724719708,
      "loss": 1.8543,
      "step": 5968
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2242629528045654,
      "learning_rate": 0.00011407171558442738,
      "loss": 1.6121,
      "step": 5969
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.338395595550537,
      "learning_rate": 0.00011404796311174945,
      "loss": 1.1349,
      "step": 5970
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.9160187244415283,
      "learning_rate": 0.00011402420983053048,
      "loss": 2.0108,
      "step": 5971
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1610984802246094,
      "learning_rate": 0.00011400045574213756,
      "loss": 1.5526,
      "step": 5972
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1691508293151855,
      "learning_rate": 0.00011397670084793785,
      "loss": 1.9523,
      "step": 5973
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9186160564422607,
      "learning_rate": 0.00011395294514929861,
      "loss": 1.7126,
      "step": 5974
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1638020277023315,
      "learning_rate": 0.00011392918864758712,
      "loss": 1.3187,
      "step": 5975
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.087029457092285,
      "learning_rate": 0.00011390543134417067,
      "loss": 2.0063,
      "step": 5976
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1630672216415405,
      "learning_rate": 0.00011388167324041669,
      "loss": 1.4128,
      "step": 5977
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.235951542854309,
      "learning_rate": 0.0001138579143376925,
      "loss": 1.5748,
      "step": 5978
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5047166347503662,
      "learning_rate": 0.00011383415463736566,
      "loss": 1.8011,
      "step": 5979
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0744316577911377,
      "learning_rate": 0.00011381039414080365,
      "loss": 1.7287,
      "step": 5980
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2451046705245972,
      "learning_rate": 0.00011378663284937399,
      "loss": 0.9203,
      "step": 5981
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.174317717552185,
      "learning_rate": 0.0001137628707644443,
      "loss": 1.3837,
      "step": 5982
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.269871234893799,
      "learning_rate": 0.00011373910788738222,
      "loss": 1.4761,
      "step": 5983
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.711721420288086,
      "learning_rate": 0.00011371534421955542,
      "loss": 1.5354,
      "step": 5984
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2511473894119263,
      "learning_rate": 0.00011369157976233171,
      "loss": 1.2956,
      "step": 5985
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6645349264144897,
      "learning_rate": 0.00011366781451707879,
      "loss": 1.3203,
      "step": 5986
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.173511266708374,
      "learning_rate": 0.00011364404848516454,
      "loss": 1.871,
      "step": 5987
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.150061845779419,
      "learning_rate": 0.0001136202816679568,
      "loss": 1.7578,
      "step": 5988
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1853909492492676,
      "learning_rate": 0.00011359651406682351,
      "loss": 1.4345,
      "step": 5989
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.753466844558716,
      "learning_rate": 0.00011357274568313258,
      "loss": 2.332,
      "step": 5990
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8936835527420044,
      "learning_rate": 0.00011354897651825209,
      "loss": 1.1762,
      "step": 5991
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4091287851333618,
      "learning_rate": 0.00011352520657355006,
      "loss": 1.4096,
      "step": 5992
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6172184944152832,
      "learning_rate": 0.00011350143585039456,
      "loss": 1.7897,
      "step": 5993
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2420837879180908,
      "learning_rate": 0.00011347766435015377,
      "loss": 1.0292,
      "step": 5994
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.8922459483146667,
      "learning_rate": 0.00011345389207419588,
      "loss": 1.0689,
      "step": 5995
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.329221248626709,
      "learning_rate": 0.00011343011902388907,
      "loss": 1.2137,
      "step": 5996
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7419462203979492,
      "learning_rate": 0.0001134063452006017,
      "loss": 1.2386,
      "step": 5997
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2657976150512695,
      "learning_rate": 0.00011338257060570199,
      "loss": 1.1389,
      "step": 5998
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4416353702545166,
      "learning_rate": 0.00011335879524055836,
      "loss": 1.1399,
      "step": 5999
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8164888620376587,
      "learning_rate": 0.00011333501910653925,
      "loss": 2.3532,
      "step": 6000
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5652554035186768,
      "learning_rate": 0.00011331124220501308,
      "loss": 2.2564,
      "step": 6001
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4006953239440918,
      "learning_rate": 0.00011328746453734831,
      "loss": 1.0717,
      "step": 6002
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.238061547279358,
      "learning_rate": 0.00011326368610491354,
      "loss": 1.4526,
      "step": 6003
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.061079978942871,
      "learning_rate": 0.00011323990690907733,
      "loss": 1.1157,
      "step": 6004
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3650280237197876,
      "learning_rate": 0.00011321612695120832,
      "loss": 1.4477,
      "step": 6005
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3774418830871582,
      "learning_rate": 0.00011319234623267516,
      "loss": 1.5823,
      "step": 6006
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3667634725570679,
      "learning_rate": 0.00011316856475484658,
      "loss": 0.9999,
      "step": 6007
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2576771974563599,
      "learning_rate": 0.00011314478251909133,
      "loss": 1.2031,
      "step": 6008
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1287518739700317,
      "learning_rate": 0.00011312099952677825,
      "loss": 1.5146,
      "step": 6009
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.776000499725342,
      "learning_rate": 0.00011309721577927619,
      "loss": 1.4911,
      "step": 6010
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3815009593963623,
      "learning_rate": 0.00011307343127795398,
      "loss": 1.6124,
      "step": 6011
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.188010573387146,
      "learning_rate": 0.00011304964602418062,
      "loss": 0.9144,
      "step": 6012
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0442415475845337,
      "learning_rate": 0.00011302586001932505,
      "loss": 1.3163,
      "step": 6013
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1313891410827637,
      "learning_rate": 0.0001130020732647563,
      "loss": 1.494,
      "step": 6014
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1230943202972412,
      "learning_rate": 0.00011297828576184344,
      "loss": 1.6746,
      "step": 6015
    },
    {
      "epoch": 0.46,
      "grad_norm": 4.370004653930664,
      "learning_rate": 0.00011295449751195557,
      "loss": 2.5987,
      "step": 6016
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.101141095161438,
      "learning_rate": 0.00011293070851646184,
      "loss": 0.5551,
      "step": 6017
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1552791595458984,
      "learning_rate": 0.00011290691877673145,
      "loss": 1.7145,
      "step": 6018
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6662920713424683,
      "learning_rate": 0.00011288312829413363,
      "loss": 1.2339,
      "step": 6019
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9797711372375488,
      "learning_rate": 0.00011285933707003769,
      "loss": 1.0817,
      "step": 6020
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7660207748413086,
      "learning_rate": 0.00011283554510581292,
      "loss": 1.5402,
      "step": 6021
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3112616539001465,
      "learning_rate": 0.00011281175240282867,
      "loss": 2.0412,
      "step": 6022
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4955332279205322,
      "learning_rate": 0.00011278795896245436,
      "loss": 1.3656,
      "step": 6023
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0092275142669678,
      "learning_rate": 0.00011276416478605949,
      "loss": 1.7698,
      "step": 6024
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5318515300750732,
      "learning_rate": 0.00011274036987501348,
      "loss": 0.9247,
      "step": 6025
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3253226280212402,
      "learning_rate": 0.00011271657423068587,
      "loss": 1.5028,
      "step": 6026
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2428276538848877,
      "learning_rate": 0.0001126927778544463,
      "loss": 1.4156,
      "step": 6027
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7658860683441162,
      "learning_rate": 0.00011266898074766433,
      "loss": 1.5932,
      "step": 6028
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2295204401016235,
      "learning_rate": 0.00011264518291170963,
      "loss": 1.219,
      "step": 6029
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.304413080215454,
      "learning_rate": 0.00011262138434795191,
      "loss": 1.4709,
      "step": 6030
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2383009195327759,
      "learning_rate": 0.00011259758505776092,
      "loss": 1.4736,
      "step": 6031
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6920067071914673,
      "learning_rate": 0.00011257378504250642,
      "loss": 1.4233,
      "step": 6032
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2252709865570068,
      "learning_rate": 0.00011254998430355829,
      "loss": 1.3863,
      "step": 6033
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2413809299468994,
      "learning_rate": 0.00011252618284228638,
      "loss": 0.9029,
      "step": 6034
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.770340919494629,
      "learning_rate": 0.00011250238066006056,
      "loss": 1.0846,
      "step": 6035
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8556060791015625,
      "learning_rate": 0.00011247857775825079,
      "loss": 1.7874,
      "step": 6036
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.082300901412964,
      "learning_rate": 0.00011245477413822712,
      "loss": 0.9455,
      "step": 6037
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3097578287124634,
      "learning_rate": 0.00011243096980135954,
      "loss": 1.5937,
      "step": 6038
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0739048719406128,
      "learning_rate": 0.00011240716474901814,
      "loss": 1.4468,
      "step": 6039
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.153100609779358,
      "learning_rate": 0.00011238335898257304,
      "loss": 1.2733,
      "step": 6040
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1559380292892456,
      "learning_rate": 0.00011235955250339437,
      "loss": 1.1889,
      "step": 6041
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3794281482696533,
      "learning_rate": 0.00011233574531285236,
      "loss": 1.2972,
      "step": 6042
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9745721220970154,
      "learning_rate": 0.00011231193741231727,
      "loss": 1.6599,
      "step": 6043
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3227864503860474,
      "learning_rate": 0.00011228812880315931,
      "loss": 1.3083,
      "step": 6044
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.466663122177124,
      "learning_rate": 0.00011226431948674889,
      "loss": 1.5575,
      "step": 6045
    },
    {
      "epoch": 0.46,
      "grad_norm": 4.409992694854736,
      "learning_rate": 0.00011224050946445628,
      "loss": 1.4121,
      "step": 6046
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.8917407989501953,
      "learning_rate": 0.00011221669873765199,
      "loss": 1.3924,
      "step": 6047
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4245901107788086,
      "learning_rate": 0.00011219288730770638,
      "loss": 1.1709,
      "step": 6048
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3119663000106812,
      "learning_rate": 0.00011216907517598996,
      "loss": 1.1851,
      "step": 6049
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1139777898788452,
      "learning_rate": 0.00011214526234387329,
      "loss": 1.4817,
      "step": 6050
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.44922137260437,
      "learning_rate": 0.00011212144881272688,
      "loss": 1.7148,
      "step": 6051
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.9861377477645874,
      "learning_rate": 0.00011209763458392135,
      "loss": 1.592,
      "step": 6052
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.3755745887756348,
      "learning_rate": 0.00011207381965882738,
      "loss": 2.36,
      "step": 6053
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2282110452651978,
      "learning_rate": 0.00011205000403881563,
      "loss": 1.5392,
      "step": 6054
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2576736211776733,
      "learning_rate": 0.0001120261877252568,
      "loss": 1.0794,
      "step": 6055
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0867451429367065,
      "learning_rate": 0.00011200237071952169,
      "loss": 1.2129,
      "step": 6056
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2307544946670532,
      "learning_rate": 0.00011197855302298115,
      "loss": 1.2293,
      "step": 6057
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.010214924812317,
      "learning_rate": 0.0001119547346370059,
      "loss": 1.29,
      "step": 6058
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2320525646209717,
      "learning_rate": 0.00011193091556296693,
      "loss": 2.6516,
      "step": 6059
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3282630443572998,
      "learning_rate": 0.00011190709580223515,
      "loss": 1.3603,
      "step": 6060
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7089418172836304,
      "learning_rate": 0.0001118832753561815,
      "loss": 1.7387,
      "step": 6061
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.628678798675537,
      "learning_rate": 0.000111859454226177,
      "loss": 1.3997,
      "step": 6062
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.30953311920166,
      "learning_rate": 0.00011183563241359267,
      "loss": 1.6338,
      "step": 6063
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5678757429122925,
      "learning_rate": 0.0001118118099197996,
      "loss": 1.737,
      "step": 6064
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.558632493019104,
      "learning_rate": 0.00011178798674616892,
      "loss": 0.6495,
      "step": 6065
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2542566061019897,
      "learning_rate": 0.00011176416289407183,
      "loss": 1.7893,
      "step": 6066
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.59071683883667,
      "learning_rate": 0.00011174033836487945,
      "loss": 1.9156,
      "step": 6067
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2604745626449585,
      "learning_rate": 0.00011171651315996308,
      "loss": 2.207,
      "step": 6068
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.200278878211975,
      "learning_rate": 0.00011169268728069397,
      "loss": 1.4924,
      "step": 6069
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0104448795318604,
      "learning_rate": 0.00011166886072844342,
      "loss": 1.096,
      "step": 6070
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0522267818450928,
      "learning_rate": 0.00011164503350458286,
      "loss": 1.3772,
      "step": 6071
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1274312734603882,
      "learning_rate": 0.00011162120561048362,
      "loss": 1.4056,
      "step": 6072
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.8445141315460205,
      "learning_rate": 0.00011159737704751709,
      "loss": 1.8484,
      "step": 6073
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2190419435501099,
      "learning_rate": 0.00011157354781705482,
      "loss": 1.0978,
      "step": 6074
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1819788217544556,
      "learning_rate": 0.00011154971792046831,
      "loss": 1.618,
      "step": 6075
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3215322494506836,
      "learning_rate": 0.0001115258873591291,
      "loss": 1.3461,
      "step": 6076
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.522384762763977,
      "learning_rate": 0.00011150205613440876,
      "loss": 1.4705,
      "step": 6077
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4131624698638916,
      "learning_rate": 0.0001114782242476789,
      "loss": 1.6226,
      "step": 6078
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4589829444885254,
      "learning_rate": 0.0001114543917003112,
      "loss": 1.5035,
      "step": 6079
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.457533359527588,
      "learning_rate": 0.00011143055849367738,
      "loss": 1.908,
      "step": 6080
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.569649338722229,
      "learning_rate": 0.0001114067246291492,
      "loss": 1.3734,
      "step": 6081
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1566849946975708,
      "learning_rate": 0.00011138289010809835,
      "loss": 0.8831,
      "step": 6082
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0508455038070679,
      "learning_rate": 0.00011135905493189671,
      "loss": 0.9276,
      "step": 6083
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.438366174697876,
      "learning_rate": 0.00011133521910191611,
      "loss": 0.7013,
      "step": 6084
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7070553302764893,
      "learning_rate": 0.00011131138261952845,
      "loss": 1.0517,
      "step": 6085
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4486273527145386,
      "learning_rate": 0.00011128754548610563,
      "loss": 1.0646,
      "step": 6086
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2847976684570312,
      "learning_rate": 0.00011126370770301966,
      "loss": 1.7438,
      "step": 6087
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4841240644454956,
      "learning_rate": 0.00011123986927164247,
      "loss": 1.0727,
      "step": 6088
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.173295259475708,
      "learning_rate": 0.00011121603019334618,
      "loss": 1.766,
      "step": 6089
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7499140501022339,
      "learning_rate": 0.00011119219046950284,
      "loss": 2.3612,
      "step": 6090
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7472127676010132,
      "learning_rate": 0.00011116835010148452,
      "loss": 1.4179,
      "step": 6091
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0020039081573486,
      "learning_rate": 0.00011114450909066343,
      "loss": 1.3668,
      "step": 6092
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0443841218948364,
      "learning_rate": 0.00011112066743841172,
      "loss": 1.1489,
      "step": 6093
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.0489921569824219,
      "learning_rate": 0.00011109682514610163,
      "loss": 1.1677,
      "step": 6094
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.6305196285247803,
      "learning_rate": 0.00011107298221510543,
      "loss": 1.707,
      "step": 6095
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0209591388702393,
      "learning_rate": 0.00011104913864679538,
      "loss": 1.6068,
      "step": 6096
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1291230916976929,
      "learning_rate": 0.00011102529444254383,
      "loss": 1.0964,
      "step": 6097
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1382437944412231,
      "learning_rate": 0.00011100144960372318,
      "loss": 1.0862,
      "step": 6098
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0122945308685303,
      "learning_rate": 0.00011097760413170582,
      "loss": 1.1792,
      "step": 6099
    },
    {
      "epoch": 0.47,
      "grad_norm": 4.986934185028076,
      "learning_rate": 0.00011095375802786419,
      "loss": 1.8187,
      "step": 6100
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1209102869033813,
      "learning_rate": 0.00011092991129357075,
      "loss": 1.6186,
      "step": 6101
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0258212089538574,
      "learning_rate": 0.00011090606393019806,
      "loss": 0.9543,
      "step": 6102
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4603657722473145,
      "learning_rate": 0.00011088221593911862,
      "loss": 1.215,
      "step": 6103
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.828179359436035,
      "learning_rate": 0.00011085836732170512,
      "loss": 1.2237,
      "step": 6104
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1688417196273804,
      "learning_rate": 0.00011083451807933008,
      "loss": 1.6308,
      "step": 6105
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5454661846160889,
      "learning_rate": 0.00011081066821336617,
      "loss": 1.8719,
      "step": 6106
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2055935859680176,
      "learning_rate": 0.00011078681772518613,
      "loss": 1.487,
      "step": 6107
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3226110935211182,
      "learning_rate": 0.00011076296661616268,
      "loss": 1.334,
      "step": 6108
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.119396448135376,
      "learning_rate": 0.00011073911488766859,
      "loss": 1.5922,
      "step": 6109
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9786494970321655,
      "learning_rate": 0.00011071526254107667,
      "loss": 1.1437,
      "step": 6110
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.4262032508850098,
      "learning_rate": 0.00011069140957775974,
      "loss": 2.1067,
      "step": 6111
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3915773630142212,
      "learning_rate": 0.00011066755599909064,
      "loss": 1.2384,
      "step": 6112
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.5149099826812744,
      "learning_rate": 0.00011064370180644239,
      "loss": 2.0435,
      "step": 6113
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.443051815032959,
      "learning_rate": 0.00011061984700118786,
      "loss": 0.7431,
      "step": 6114
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.408818006515503,
      "learning_rate": 0.00011059599158470002,
      "loss": 1.6076,
      "step": 6115
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8487436771392822,
      "learning_rate": 0.00011057213555835191,
      "loss": 1.939,
      "step": 6116
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2591646909713745,
      "learning_rate": 0.00011054827892351655,
      "loss": 1.2319,
      "step": 6117
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.731316864490509,
      "learning_rate": 0.00011052442168156713,
      "loss": 0.9967,
      "step": 6118
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.8808165192604065,
      "learning_rate": 0.00011050056383387669,
      "loss": 1.2183,
      "step": 6119
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2435249090194702,
      "learning_rate": 0.00011047670538181834,
      "loss": 1.4878,
      "step": 6120
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4049510955810547,
      "learning_rate": 0.00011045284632676536,
      "loss": 1.2174,
      "step": 6121
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.517120599746704,
      "learning_rate": 0.00011042898667009093,
      "loss": 1.8238,
      "step": 6122
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2560362815856934,
      "learning_rate": 0.00011040512641316834,
      "loss": 1.1158,
      "step": 6123
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5110710859298706,
      "learning_rate": 0.00011038126555737085,
      "loss": 1.5203,
      "step": 6124
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.824556827545166,
      "learning_rate": 0.00011035740410407182,
      "loss": 1.3623,
      "step": 6125
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5702282190322876,
      "learning_rate": 0.00011033354205464457,
      "loss": 1.2945,
      "step": 6126
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.636125087738037,
      "learning_rate": 0.00011030967941046256,
      "loss": 2.0104,
      "step": 6127
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3838715553283691,
      "learning_rate": 0.00011028581617289921,
      "loss": 1.8279,
      "step": 6128
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.676055908203125,
      "learning_rate": 0.00011026195234332793,
      "loss": 1.7043,
      "step": 6129
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1925486326217651,
      "learning_rate": 0.00011023808792312227,
      "loss": 0.9913,
      "step": 6130
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1202458143234253,
      "learning_rate": 0.00011021422291365575,
      "loss": 1.7397,
      "step": 6131
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4098315238952637,
      "learning_rate": 0.00011019035731630197,
      "loss": 1.3317,
      "step": 6132
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3706330060958862,
      "learning_rate": 0.00011016649113243447,
      "loss": 1.3538,
      "step": 6133
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2702078819274902,
      "learning_rate": 0.00011014262436342693,
      "loss": 1.1087,
      "step": 6134
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9466825723648071,
      "learning_rate": 0.000110118757010653,
      "loss": 1.3258,
      "step": 6135
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8730918169021606,
      "learning_rate": 0.00011009488907548642,
      "loss": 1.6285,
      "step": 6136
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2587192058563232,
      "learning_rate": 0.0001100710205593009,
      "loss": 1.2573,
      "step": 6137
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4745489358901978,
      "learning_rate": 0.00011004715146347018,
      "loss": 0.8089,
      "step": 6138
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7566899061203003,
      "learning_rate": 0.00011002328178936811,
      "loss": 1.874,
      "step": 6139
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5442451238632202,
      "learning_rate": 0.00010999941153836853,
      "loss": 1.5701,
      "step": 6140
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6292660236358643,
      "learning_rate": 0.00010997554071184528,
      "loss": 1.0403,
      "step": 6141
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.080254316329956,
      "learning_rate": 0.00010995166931117229,
      "loss": 0.7345,
      "step": 6142
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8212741613388062,
      "learning_rate": 0.00010992779733772347,
      "loss": 1.6567,
      "step": 6143
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6153638362884521,
      "learning_rate": 0.00010990392479287277,
      "loss": 1.4174,
      "step": 6144
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9252447485923767,
      "learning_rate": 0.00010988005167799427,
      "loss": 0.8582,
      "step": 6145
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4866955280303955,
      "learning_rate": 0.00010985617799446193,
      "loss": 0.8792,
      "step": 6146
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.622668981552124,
      "learning_rate": 0.00010983230374364986,
      "loss": 0.7016,
      "step": 6147
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.545378565788269,
      "learning_rate": 0.00010980842892693216,
      "loss": 1.4567,
      "step": 6148
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3819690942764282,
      "learning_rate": 0.00010978455354568293,
      "loss": 1.5662,
      "step": 6149
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9966606497764587,
      "learning_rate": 0.00010976067760127635,
      "loss": 1.4867,
      "step": 6150
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2798508405685425,
      "learning_rate": 0.00010973680109508666,
      "loss": 1.2688,
      "step": 6151
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8272812366485596,
      "learning_rate": 0.00010971292402848803,
      "loss": 1.1913,
      "step": 6152
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1968938112258911,
      "learning_rate": 0.00010968904640285473,
      "loss": 1.2574,
      "step": 6153
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1392593383789062,
      "learning_rate": 0.0001096651682195611,
      "loss": 1.5116,
      "step": 6154
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6400192975997925,
      "learning_rate": 0.00010964128947998142,
      "loss": 0.9916,
      "step": 6155
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.7397522926330566,
      "learning_rate": 0.00010961741018549007,
      "loss": 2.119,
      "step": 6156
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.019266963005066,
      "learning_rate": 0.00010959353033746147,
      "loss": 1.3868,
      "step": 6157
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6932507753372192,
      "learning_rate": 0.00010956964993726997,
      "loss": 1.7552,
      "step": 6158
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4991956949234009,
      "learning_rate": 0.00010954576898629007,
      "loss": 1.1418,
      "step": 6159
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2523462772369385,
      "learning_rate": 0.00010952188748589625,
      "loss": 1.8668,
      "step": 6160
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0193480253219604,
      "learning_rate": 0.00010949800543746308,
      "loss": 0.6796,
      "step": 6161
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4589598178863525,
      "learning_rate": 0.00010947412284236499,
      "loss": 1.6528,
      "step": 6162
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.401432752609253,
      "learning_rate": 0.00010945023970197666,
      "loss": 1.9841,
      "step": 6163
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5770561695098877,
      "learning_rate": 0.00010942635601767266,
      "loss": 1.3803,
      "step": 6164
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0641082525253296,
      "learning_rate": 0.00010940247179082766,
      "loss": 1.3281,
      "step": 6165
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0682114362716675,
      "learning_rate": 0.00010937858702281631,
      "loss": 0.9841,
      "step": 6166
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.923858880996704,
      "learning_rate": 0.00010935470171501332,
      "loss": 1.6158,
      "step": 6167
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6077885627746582,
      "learning_rate": 0.00010933081586879341,
      "loss": 1.0777,
      "step": 6168
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9036999940872192,
      "learning_rate": 0.0001093069294855314,
      "loss": 0.6364,
      "step": 6169
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0183491706848145,
      "learning_rate": 0.00010928304256660209,
      "loss": 1.4893,
      "step": 6170
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.9689347743988037,
      "learning_rate": 0.00010925915511338023,
      "loss": 0.8489,
      "step": 6171
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4216586351394653,
      "learning_rate": 0.00010923526712724073,
      "loss": 1.7096,
      "step": 6172
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4723584651947021,
      "learning_rate": 0.0001092113786095585,
      "loss": 1.4534,
      "step": 6173
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5320508480072021,
      "learning_rate": 0.00010918748956170843,
      "loss": 1.2318,
      "step": 6174
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.279924988746643,
      "learning_rate": 0.0001091635999850655,
      "loss": 1.1632,
      "step": 6175
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.664663314819336,
      "learning_rate": 0.00010913970988100463,
      "loss": 1.8563,
      "step": 6176
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3579820394515991,
      "learning_rate": 0.00010911581925090088,
      "loss": 1.2606,
      "step": 6177
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.035343050956726,
      "learning_rate": 0.00010909192809612933,
      "loss": 1.327,
      "step": 6178
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4307533502578735,
      "learning_rate": 0.00010906803641806502,
      "loss": 1.2919,
      "step": 6179
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0585452318191528,
      "learning_rate": 0.00010904414421808303,
      "loss": 1.8761,
      "step": 6180
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2315324544906616,
      "learning_rate": 0.00010902025149755852,
      "loss": 1.0067,
      "step": 6181
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6803690195083618,
      "learning_rate": 0.00010899635825786662,
      "loss": 1.2743,
      "step": 6182
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6334898471832275,
      "learning_rate": 0.00010897246450038256,
      "loss": 1.6682,
      "step": 6183
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4570642709732056,
      "learning_rate": 0.0001089485702264816,
      "loss": 1.3078,
      "step": 6184
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3842980861663818,
      "learning_rate": 0.00010892467543753888,
      "loss": 1.0199,
      "step": 6185
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.485168695449829,
      "learning_rate": 0.00010890078013492976,
      "loss": 1.4041,
      "step": 6186
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2214419841766357,
      "learning_rate": 0.00010887688432002958,
      "loss": 1.6545,
      "step": 6187
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.07343590259552,
      "learning_rate": 0.00010885298799421362,
      "loss": 1.8018,
      "step": 6188
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.164033055305481,
      "learning_rate": 0.00010882909115885726,
      "loss": 1.8315,
      "step": 6189
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3156732320785522,
      "learning_rate": 0.00010880519381533592,
      "loss": 1.1155,
      "step": 6190
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2530723810195923,
      "learning_rate": 0.000108781295965025,
      "loss": 1.3672,
      "step": 6191
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6525115966796875,
      "learning_rate": 0.00010875739760930002,
      "loss": 1.1112,
      "step": 6192
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.369551658630371,
      "learning_rate": 0.0001087334987495364,
      "loss": 1.4651,
      "step": 6193
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0916296243667603,
      "learning_rate": 0.00010870959938710972,
      "loss": 1.5402,
      "step": 6194
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0303581953048706,
      "learning_rate": 0.00010868569952339547,
      "loss": 1.6365,
      "step": 6195
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2153642177581787,
      "learning_rate": 0.00010866179915976924,
      "loss": 1.2506,
      "step": 6196
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1383283138275146,
      "learning_rate": 0.00010863789829760663,
      "loss": 0.6829,
      "step": 6197
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0425794124603271,
      "learning_rate": 0.00010861399693828331,
      "loss": 1.1295,
      "step": 6198
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.057981014251709,
      "learning_rate": 0.00010859009508317492,
      "loss": 1.4698,
      "step": 6199
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.59928822517395,
      "learning_rate": 0.0001085661927336571,
      "loss": 2.306,
      "step": 6200
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5776727199554443,
      "learning_rate": 0.00010854228989110565,
      "loss": 1.8066,
      "step": 6201
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5095187425613403,
      "learning_rate": 0.00010851838655689625,
      "loss": 1.362,
      "step": 6202
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6473785638809204,
      "learning_rate": 0.0001084944827324047,
      "loss": 0.7619,
      "step": 6203
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.4356088638305664,
      "learning_rate": 0.00010847057841900681,
      "loss": 1.6636,
      "step": 6204
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4628461599349976,
      "learning_rate": 0.00010844667361807842,
      "loss": 0.9676,
      "step": 6205
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4806137084960938,
      "learning_rate": 0.00010842276833099534,
      "loss": 1.2178,
      "step": 6206
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1501374244689941,
      "learning_rate": 0.0001083988625591335,
      "loss": 1.3178,
      "step": 6207
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5512245893478394,
      "learning_rate": 0.00010837495630386882,
      "loss": 1.5526,
      "step": 6208
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2729791402816772,
      "learning_rate": 0.0001083510495665772,
      "loss": 1.4744,
      "step": 6209
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1095324754714966,
      "learning_rate": 0.00010832714234863466,
      "loss": 1.7838,
      "step": 6210
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5531595945358276,
      "learning_rate": 0.00010830323465141719,
      "loss": 1.249,
      "step": 6211
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5456243753433228,
      "learning_rate": 0.00010827932647630077,
      "loss": 1.4156,
      "step": 6212
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6702971458435059,
      "learning_rate": 0.00010825541782466149,
      "loss": 1.943,
      "step": 6213
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0470209121704102,
      "learning_rate": 0.00010823150869787542,
      "loss": 1.1528,
      "step": 6214
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2435452938079834,
      "learning_rate": 0.00010820759909731866,
      "loss": 1.5766,
      "step": 6215
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.0122798681259155,
      "learning_rate": 0.00010818368902436738,
      "loss": 1.7415,
      "step": 6216
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3650566339492798,
      "learning_rate": 0.0001081597784803977,
      "loss": 0.8716,
      "step": 6217
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4831217527389526,
      "learning_rate": 0.00010813586746678583,
      "loss": 1.3846,
      "step": 6218
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3527950048446655,
      "learning_rate": 0.000108111955984908,
      "loss": 1.5002,
      "step": 6219
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.485074043273926,
      "learning_rate": 0.00010808804403614043,
      "loss": 1.1732,
      "step": 6220
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.3928802013397217,
      "learning_rate": 0.0001080641316218594,
      "loss": 1.5025,
      "step": 6221
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3991765975952148,
      "learning_rate": 0.0001080402187434412,
      "loss": 1.3817,
      "step": 6222
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.740664005279541,
      "learning_rate": 0.00010801630540226216,
      "loss": 1.3103,
      "step": 6223
    },
    {
      "epoch": 0.47,
      "grad_norm": 5.2657551765441895,
      "learning_rate": 0.00010799239159969861,
      "loss": 1.9284,
      "step": 6224
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1531383991241455,
      "learning_rate": 0.00010796847733712696,
      "loss": 1.3677,
      "step": 6225
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7641987800598145,
      "learning_rate": 0.00010794456261592361,
      "loss": 1.1099,
      "step": 6226
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.3140037059783936,
      "learning_rate": 0.00010792064743746498,
      "loss": 1.5335,
      "step": 6227
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6061865091323853,
      "learning_rate": 0.00010789673180312752,
      "loss": 2.3079,
      "step": 6228
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.603733777999878,
      "learning_rate": 0.00010787281571428772,
      "loss": 1.4069,
      "step": 6229
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3606497049331665,
      "learning_rate": 0.00010784889917232206,
      "loss": 1.5247,
      "step": 6230
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4867589473724365,
      "learning_rate": 0.00010782498217860712,
      "loss": 1.3837,
      "step": 6231
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.843737006187439,
      "learning_rate": 0.00010780106473451946,
      "loss": 1.31,
      "step": 6232
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2663817405700684,
      "learning_rate": 0.00010777714684143561,
      "loss": 1.761,
      "step": 6233
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.92199969291687,
      "learning_rate": 0.00010775322850073223,
      "loss": 1.445,
      "step": 6234
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7447340488433838,
      "learning_rate": 0.00010772930971378596,
      "loss": 1.5821,
      "step": 6235
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.179990530014038,
      "learning_rate": 0.00010770539048197346,
      "loss": 1.0243,
      "step": 6236
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0064547061920166,
      "learning_rate": 0.00010768147080667139,
      "loss": 1.0399,
      "step": 6237
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.8487887382507324,
      "learning_rate": 0.00010765755068925649,
      "loss": 2.0543,
      "step": 6238
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9372012615203857,
      "learning_rate": 0.0001076336301311055,
      "loss": 1.0111,
      "step": 6239
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1001914739608765,
      "learning_rate": 0.00010760970913359518,
      "loss": 1.6592,
      "step": 6240
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.006653904914856,
      "learning_rate": 0.00010758578769810235,
      "loss": 1.1291,
      "step": 6241
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.632192850112915,
      "learning_rate": 0.00010756186582600376,
      "loss": 1.9286,
      "step": 6242
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7163540124893188,
      "learning_rate": 0.0001075379435186763,
      "loss": 0.8132,
      "step": 6243
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1978068351745605,
      "learning_rate": 0.00010751402077749684,
      "loss": 1.4854,
      "step": 6244
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3181979656219482,
      "learning_rate": 0.00010749009760384226,
      "loss": 1.1229,
      "step": 6245
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5483436584472656,
      "learning_rate": 0.00010746617399908947,
      "loss": 1.8643,
      "step": 6246
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.469643473625183,
      "learning_rate": 0.0001074422499646154,
      "loss": 1.3956,
      "step": 6247
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.547907590866089,
      "learning_rate": 0.00010741832550179702,
      "loss": 1.6221,
      "step": 6248
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.7705230712890625,
      "learning_rate": 0.00010739440061201139,
      "loss": 1.4377,
      "step": 6249
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3827121257781982,
      "learning_rate": 0.00010737047529663545,
      "loss": 2.039,
      "step": 6250
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.054854393005371,
      "learning_rate": 0.00010734654955704622,
      "loss": 1.7315,
      "step": 6251
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.304291009902954,
      "learning_rate": 0.0001073226233946208,
      "loss": 1.4513,
      "step": 6252
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5044198036193848,
      "learning_rate": 0.0001072986968107363,
      "loss": 1.6825,
      "step": 6253
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.353761911392212,
      "learning_rate": 0.00010727476980676983,
      "loss": 1.3745,
      "step": 6254
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6025265455245972,
      "learning_rate": 0.00010725084238409848,
      "loss": 1.8098,
      "step": 6255
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4333640336990356,
      "learning_rate": 0.00010722691454409943,
      "loss": 1.537,
      "step": 6256
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.7109873294830322,
      "learning_rate": 0.00010720298628814991,
      "loss": 1.56,
      "step": 6257
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.094747543334961,
      "learning_rate": 0.00010717905761762707,
      "loss": 1.3865,
      "step": 6258
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8793833255767822,
      "learning_rate": 0.00010715512853390817,
      "loss": 0.9105,
      "step": 6259
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1908761262893677,
      "learning_rate": 0.00010713119903837045,
      "loss": 1.1044,
      "step": 6260
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.133609771728516,
      "learning_rate": 0.00010710726913239122,
      "loss": 1.9607,
      "step": 6261
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2075468301773071,
      "learning_rate": 0.00010708333881734776,
      "loss": 1.2021,
      "step": 6262
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3343462944030762,
      "learning_rate": 0.00010705940809461739,
      "loss": 1.5616,
      "step": 6263
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.156516432762146,
      "learning_rate": 0.00010703547696557752,
      "loss": 1.3114,
      "step": 6264
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1630797386169434,
      "learning_rate": 0.00010701154543160541,
      "loss": 1.2145,
      "step": 6265
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3947426080703735,
      "learning_rate": 0.00010698761349407855,
      "loss": 1.3381,
      "step": 6266
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9020156860351562,
      "learning_rate": 0.00010696368115437436,
      "loss": 1.6633,
      "step": 6267
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2052934169769287,
      "learning_rate": 0.00010693974841387024,
      "loss": 1.7863,
      "step": 6268
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3442034721374512,
      "learning_rate": 0.0001069158152739437,
      "loss": 1.2127,
      "step": 6269
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3552123308181763,
      "learning_rate": 0.00010689188173597218,
      "loss": 0.8177,
      "step": 6270
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3658225536346436,
      "learning_rate": 0.0001068679478013332,
      "loss": 1.2209,
      "step": 6271
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5371191501617432,
      "learning_rate": 0.00010684401347140434,
      "loss": 1.1264,
      "step": 6272
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4994407892227173,
      "learning_rate": 0.00010682007874756314,
      "loss": 1.4714,
      "step": 6273
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.15763258934021,
      "learning_rate": 0.00010679614363118717,
      "loss": 1.4945,
      "step": 6274
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.8649325370788574,
      "learning_rate": 0.00010677220812365405,
      "loss": 0.9877,
      "step": 6275
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.834121823310852,
      "learning_rate": 0.00010674827222634137,
      "loss": 1.1817,
      "step": 6276
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3314309120178223,
      "learning_rate": 0.00010672433594062679,
      "loss": 1.2244,
      "step": 6277
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5746185779571533,
      "learning_rate": 0.00010670039926788804,
      "loss": 1.8399,
      "step": 6278
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.129408597946167,
      "learning_rate": 0.00010667646220950276,
      "loss": 1.4497,
      "step": 6279
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8567843437194824,
      "learning_rate": 0.00010665252476684864,
      "loss": 1.5147,
      "step": 6280
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3729479312896729,
      "learning_rate": 0.00010662858694130347,
      "loss": 1.3378,
      "step": 6281
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.220494270324707,
      "learning_rate": 0.00010660464873424498,
      "loss": 0.9874,
      "step": 6282
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.112293004989624,
      "learning_rate": 0.00010658071014705096,
      "loss": 1.5764,
      "step": 6283
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.8422027826309204,
      "learning_rate": 0.00010655677118109922,
      "loss": 1.313,
      "step": 6284
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9221277236938477,
      "learning_rate": 0.00010653283183776759,
      "loss": 1.0076,
      "step": 6285
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.7036476135253906,
      "learning_rate": 0.00010650889211843387,
      "loss": 1.2652,
      "step": 6286
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3646855354309082,
      "learning_rate": 0.000106484952024476,
      "loss": 2.0064,
      "step": 6287
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.279917597770691,
      "learning_rate": 0.00010646101155727185,
      "loss": 1.6724,
      "step": 6288
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.309093952178955,
      "learning_rate": 0.00010643707071819927,
      "loss": 1.4166,
      "step": 6289
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4954733848571777,
      "learning_rate": 0.00010641312950863626,
      "loss": 1.5207,
      "step": 6290
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.473715901374817,
      "learning_rate": 0.00010638918792996075,
      "loss": 1.3464,
      "step": 6291
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4746313095092773,
      "learning_rate": 0.00010636524598355074,
      "loss": 2.0331,
      "step": 6292
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.396408200263977,
      "learning_rate": 0.00010634130367078419,
      "loss": 1.237,
      "step": 6293
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.157403826713562,
      "learning_rate": 0.00010631736099303915,
      "loss": 1.3001,
      "step": 6294
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2648625373840332,
      "learning_rate": 0.0001062934179516936,
      "loss": 1.529,
      "step": 6295
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3581730127334595,
      "learning_rate": 0.00010626947454812568,
      "loss": 1.7692,
      "step": 6296
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.275383472442627,
      "learning_rate": 0.00010624553078371345,
      "loss": 1.7509,
      "step": 6297
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1266276836395264,
      "learning_rate": 0.00010622158665983495,
      "loss": 1.4269,
      "step": 6298
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0353583097457886,
      "learning_rate": 0.00010619764217786836,
      "loss": 1.143,
      "step": 6299
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3291534185409546,
      "learning_rate": 0.00010617369733919183,
      "loss": 1.4907,
      "step": 6300
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4006282091140747,
      "learning_rate": 0.0001061497521451835,
      "loss": 1.6469,
      "step": 6301
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1648443937301636,
      "learning_rate": 0.00010612580659722153,
      "loss": 1.3201,
      "step": 6302
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5983117818832397,
      "learning_rate": 0.00010610186069668418,
      "loss": 0.9175,
      "step": 6303
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1180832386016846,
      "learning_rate": 0.00010607791444494961,
      "loss": 1.2517,
      "step": 6304
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.878462076187134,
      "learning_rate": 0.00010605396784339612,
      "loss": 1.2053,
      "step": 6305
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3982125520706177,
      "learning_rate": 0.00010603002089340197,
      "loss": 1.7572,
      "step": 6306
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0492324829101562,
      "learning_rate": 0.0001060060735963454,
      "loss": 1.7846,
      "step": 6307
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0775315761566162,
      "learning_rate": 0.00010598212595360477,
      "loss": 1.6508,
      "step": 6308
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3414156436920166,
      "learning_rate": 0.00010595817796655836,
      "loss": 1.4665,
      "step": 6309
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0212277173995972,
      "learning_rate": 0.00010593422963658452,
      "loss": 0.9142,
      "step": 6310
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0654096603393555,
      "learning_rate": 0.00010591028096506168,
      "loss": 1.3044,
      "step": 6311
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0739768743515015,
      "learning_rate": 0.00010588633195336812,
      "loss": 1.5588,
      "step": 6312
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.199233055114746,
      "learning_rate": 0.0001058623826028823,
      "loss": 2.2553,
      "step": 6313
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2361353635787964,
      "learning_rate": 0.00010583843291498266,
      "loss": 0.9305,
      "step": 6314
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3161602020263672,
      "learning_rate": 0.00010581448289104758,
      "loss": 1.0534,
      "step": 6315
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.159707546234131,
      "learning_rate": 0.00010579053253245561,
      "loss": 2.3015,
      "step": 6316
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2209627628326416,
      "learning_rate": 0.00010576658184058514,
      "loss": 1.6028,
      "step": 6317
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2374144792556763,
      "learning_rate": 0.00010574263081681475,
      "loss": 0.9729,
      "step": 6318
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1006346940994263,
      "learning_rate": 0.00010571867946252287,
      "loss": 1.5402,
      "step": 6319
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2926362752914429,
      "learning_rate": 0.00010569472777908813,
      "loss": 1.573,
      "step": 6320
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.996274709701538,
      "learning_rate": 0.00010567077576788904,
      "loss": 1.1802,
      "step": 6321
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0812591314315796,
      "learning_rate": 0.00010564682343030416,
      "loss": 1.3607,
      "step": 6322
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2377489805221558,
      "learning_rate": 0.00010562287076771214,
      "loss": 1.2051,
      "step": 6323
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.8463954925537109,
      "learning_rate": 0.00010559891778149154,
      "loss": 1.3929,
      "step": 6324
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.5900533199310303,
      "learning_rate": 0.00010557496447302102,
      "loss": 1.1867,
      "step": 6325
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1259018182754517,
      "learning_rate": 0.00010555101084367925,
      "loss": 1.2011,
      "step": 6326
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4121928215026855,
      "learning_rate": 0.00010552705689484481,
      "loss": 1.3256,
      "step": 6327
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9856261014938354,
      "learning_rate": 0.00010550310262789649,
      "loss": 1.8465,
      "step": 6328
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0439730882644653,
      "learning_rate": 0.00010547914804421295,
      "loss": 1.1095,
      "step": 6329
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1127967834472656,
      "learning_rate": 0.00010545519314517291,
      "loss": 0.9299,
      "step": 6330
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.020728588104248,
      "learning_rate": 0.00010543123793215515,
      "loss": 1.2612,
      "step": 6331
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.664035439491272,
      "learning_rate": 0.00010540728240653838,
      "loss": 1.8301,
      "step": 6332
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5754975080490112,
      "learning_rate": 0.0001053833265697014,
      "loss": 1.3693,
      "step": 6333
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3572715520858765,
      "learning_rate": 0.00010535937042302304,
      "loss": 1.3088,
      "step": 6334
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5631927251815796,
      "learning_rate": 0.00010533541396788205,
      "loss": 1.7718,
      "step": 6335
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1042759418487549,
      "learning_rate": 0.0001053114572056573,
      "loss": 1.4602,
      "step": 6336
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1870183944702148,
      "learning_rate": 0.00010528750013772762,
      "loss": 1.1238,
      "step": 6337
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9958871603012085,
      "learning_rate": 0.0001052635427654719,
      "loss": 1.4802,
      "step": 6338
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.450719952583313,
      "learning_rate": 0.00010523958509026901,
      "loss": 1.8178,
      "step": 6339
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.320455551147461,
      "learning_rate": 0.00010521562711349788,
      "loss": 1.4941,
      "step": 6340
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.2592062950134277,
      "learning_rate": 0.0001051916688365374,
      "loss": 1.3668,
      "step": 6341
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5716395378112793,
      "learning_rate": 0.00010516771026076647,
      "loss": 1.8047,
      "step": 6342
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4653555154800415,
      "learning_rate": 0.00010514375138756411,
      "loss": 1.0195,
      "step": 6343
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.782590389251709,
      "learning_rate": 0.0001051197922183093,
      "loss": 1.0225,
      "step": 6344
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8502683639526367,
      "learning_rate": 0.00010509583275438095,
      "loss": 1.7703,
      "step": 6345
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.810633659362793,
      "learning_rate": 0.00010507187299715815,
      "loss": 1.0899,
      "step": 6346
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.577893853187561,
      "learning_rate": 0.00010504791294801986,
      "loss": 1.1309,
      "step": 6347
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7608925104141235,
      "learning_rate": 0.00010502395260834514,
      "loss": 1.9888,
      "step": 6348
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.9350829720497131,
      "learning_rate": 0.00010499999197951305,
      "loss": 1.0986,
      "step": 6349
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.291115164756775,
      "learning_rate": 0.00010497603106290266,
      "loss": 1.2711,
      "step": 6350
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0262277126312256,
      "learning_rate": 0.00010495206985989304,
      "loss": 1.7701,
      "step": 6351
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0642553567886353,
      "learning_rate": 0.00010492810837186333,
      "loss": 1.0954,
      "step": 6352
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3160550594329834,
      "learning_rate": 0.00010490414660019264,
      "loss": 1.6776,
      "step": 6353
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4545706510543823,
      "learning_rate": 0.00010488018454626007,
      "loss": 1.4219,
      "step": 6354
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4139208793640137,
      "learning_rate": 0.00010485622221144484,
      "loss": 1.5561,
      "step": 6355
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.090855360031128,
      "learning_rate": 0.00010483225959712606,
      "loss": 1.2523,
      "step": 6356
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.210751533508301,
      "learning_rate": 0.00010480829670468295,
      "loss": 1.1783,
      "step": 6357
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.0735137462615967,
      "learning_rate": 0.00010478433353549473,
      "loss": 1.4003,
      "step": 6358
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4441266059875488,
      "learning_rate": 0.00010476037009094058,
      "loss": 1.4441,
      "step": 6359
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7288376092910767,
      "learning_rate": 0.0001047364063723997,
      "loss": 1.9043,
      "step": 6360
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.496440052986145,
      "learning_rate": 0.00010471244238125142,
      "loss": 1.5532,
      "step": 6361
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2528226375579834,
      "learning_rate": 0.00010468847811887499,
      "loss": 1.537,
      "step": 6362
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6983036994934082,
      "learning_rate": 0.00010466451358664963,
      "loss": 1.4448,
      "step": 6363
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2110235691070557,
      "learning_rate": 0.00010464054878595472,
      "loss": 1.6271,
      "step": 6364
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3976130485534668,
      "learning_rate": 0.00010461658371816953,
      "loss": 1.3817,
      "step": 6365
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.1458051204681396,
      "learning_rate": 0.00010459261838467336,
      "loss": 1.5065,
      "step": 6366
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7139164209365845,
      "learning_rate": 0.0001045686527868456,
      "loss": 1.5464,
      "step": 6367
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7657716274261475,
      "learning_rate": 0.00010454468692606561,
      "loss": 2.6068,
      "step": 6368
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6934552192687988,
      "learning_rate": 0.00010452072080371269,
      "loss": 1.1101,
      "step": 6369
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7212483882904053,
      "learning_rate": 0.00010449675442116634,
      "loss": 0.6209,
      "step": 6370
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1495535373687744,
      "learning_rate": 0.00010447278777980586,
      "loss": 1.9522,
      "step": 6371
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3121421337127686,
      "learning_rate": 0.00010444882088101074,
      "loss": 1.9068,
      "step": 6372
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3659155368804932,
      "learning_rate": 0.00010442485372616038,
      "loss": 1.1671,
      "step": 6373
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6339960098266602,
      "learning_rate": 0.00010440088631663422,
      "loss": 1.6841,
      "step": 6374
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9180340766906738,
      "learning_rate": 0.00010437691865381174,
      "loss": 1.3378,
      "step": 6375
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3191572427749634,
      "learning_rate": 0.00010435295073907241,
      "loss": 1.3084,
      "step": 6376
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0316944122314453,
      "learning_rate": 0.00010432898257379577,
      "loss": 1.7508,
      "step": 6377
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8446015119552612,
      "learning_rate": 0.00010430501415936122,
      "loss": 1.577,
      "step": 6378
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.63284969329834,
      "learning_rate": 0.00010428104549714837,
      "loss": 0.9129,
      "step": 6379
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.646402359008789,
      "learning_rate": 0.00010425707658853672,
      "loss": 2.1267,
      "step": 6380
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3842436075210571,
      "learning_rate": 0.0001042331074349058,
      "loss": 1.4859,
      "step": 6381
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2720608711242676,
      "learning_rate": 0.00010420913803763521,
      "loss": 1.8484,
      "step": 6382
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6079182624816895,
      "learning_rate": 0.00010418516839810452,
      "loss": 1.1528,
      "step": 6383
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.288496494293213,
      "learning_rate": 0.0001041611985176933,
      "loss": 1.0737,
      "step": 6384
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1920742988586426,
      "learning_rate": 0.00010413722839778117,
      "loss": 0.9692,
      "step": 6385
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5451197624206543,
      "learning_rate": 0.00010411325803974777,
      "loss": 2.1136,
      "step": 6386
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2547529935836792,
      "learning_rate": 0.00010408928744497268,
      "loss": 1.6157,
      "step": 6387
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0736825466156006,
      "learning_rate": 0.00010406531661483559,
      "loss": 1.9646,
      "step": 6388
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4890838861465454,
      "learning_rate": 0.00010404134555071612,
      "loss": 1.4905,
      "step": 6389
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.8193438053131104,
      "learning_rate": 0.00010401737425399396,
      "loss": 1.1279,
      "step": 6390
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3118375539779663,
      "learning_rate": 0.00010399340272604884,
      "loss": 1.6643,
      "step": 6391
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.8958210349082947,
      "learning_rate": 0.0001039694309682604,
      "loss": 1.3199,
      "step": 6392
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1228721141815186,
      "learning_rate": 0.00010394545898200836,
      "loss": 2.0667,
      "step": 6393
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.7963722944259644,
      "learning_rate": 0.00010392148676867249,
      "loss": 3.2236,
      "step": 6394
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1509753465652466,
      "learning_rate": 0.00010389751432963248,
      "loss": 1.2974,
      "step": 6395
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6610236167907715,
      "learning_rate": 0.0001038735416662681,
      "loss": 1.6045,
      "step": 6396
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2575929164886475,
      "learning_rate": 0.0001038495687799591,
      "loss": 1.3,
      "step": 6397
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1308716535568237,
      "learning_rate": 0.00010382559567208527,
      "loss": 1.4535,
      "step": 6398
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.288931965827942,
      "learning_rate": 0.00010380162234402642,
      "loss": 1.4307,
      "step": 6399
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.647803544998169,
      "learning_rate": 0.00010377764879716234,
      "loss": 1.722,
      "step": 6400
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3319677114486694,
      "learning_rate": 0.00010375367503287283,
      "loss": 1.2437,
      "step": 6401
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7024058103561401,
      "learning_rate": 0.00010372970105253771,
      "loss": 1.0641,
      "step": 6402
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2849299907684326,
      "learning_rate": 0.00010370572685753688,
      "loss": 1.4335,
      "step": 6403
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.054636001586914,
      "learning_rate": 0.00010368175244925011,
      "loss": 0.9399,
      "step": 6404
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1719392538070679,
      "learning_rate": 0.00010365777782905735,
      "loss": 1.6588,
      "step": 6405
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8624529838562012,
      "learning_rate": 0.0001036338029983384,
      "loss": 1.3632,
      "step": 6406
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3250470161437988,
      "learning_rate": 0.00010360982795847319,
      "loss": 1.4679,
      "step": 6407
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.948027491569519,
      "learning_rate": 0.0001035858527108416,
      "loss": 0.8397,
      "step": 6408
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6283271312713623,
      "learning_rate": 0.00010356187725682359,
      "loss": 1.7436,
      "step": 6409
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.8971397280693054,
      "learning_rate": 0.00010353790159779903,
      "loss": 1.0935,
      "step": 6410
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3533612489700317,
      "learning_rate": 0.00010351392573514789,
      "loss": 1.3496,
      "step": 6411
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.185720205307007,
      "learning_rate": 0.00010348994967025012,
      "loss": 1.4117,
      "step": 6412
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8663378953933716,
      "learning_rate": 0.00010346597340448562,
      "loss": 1.7903,
      "step": 6413
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6806738376617432,
      "learning_rate": 0.00010344199693923447,
      "loss": 1.0415,
      "step": 6414
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3367353677749634,
      "learning_rate": 0.00010341802027587659,
      "loss": 1.8269,
      "step": 6415
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3522114753723145,
      "learning_rate": 0.00010339404341579195,
      "loss": 1.6891,
      "step": 6416
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2016899585723877,
      "learning_rate": 0.0001033700663603606,
      "loss": 2.0496,
      "step": 6417
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2492222785949707,
      "learning_rate": 0.00010334608911096256,
      "loss": 1.7023,
      "step": 6418
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0328017473220825,
      "learning_rate": 0.00010332211166897783,
      "loss": 1.7806,
      "step": 6419
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4095368385314941,
      "learning_rate": 0.00010329813403578646,
      "loss": 1.5382,
      "step": 6420
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3403563499450684,
      "learning_rate": 0.0001032741562127685,
      "loss": 1.063,
      "step": 6421
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9254189729690552,
      "learning_rate": 0.000103250178201304,
      "loss": 1.1053,
      "step": 6422
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2301853895187378,
      "learning_rate": 0.00010322620000277308,
      "loss": 1.3307,
      "step": 6423
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4562855958938599,
      "learning_rate": 0.00010320222161855579,
      "loss": 1.4407,
      "step": 6424
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.146155595779419,
      "learning_rate": 0.00010317824305003219,
      "loss": 1.1266,
      "step": 6425
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8872063159942627,
      "learning_rate": 0.00010315426429858246,
      "loss": 1.6936,
      "step": 6426
    },
    {
      "epoch": 0.49,
      "grad_norm": 4.529325485229492,
      "learning_rate": 0.00010313028536558664,
      "loss": 2.0127,
      "step": 6427
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6702533960342407,
      "learning_rate": 0.00010310630625242492,
      "loss": 2.0617,
      "step": 6428
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1122488975524902,
      "learning_rate": 0.0001030823269604774,
      "loss": 1.6664,
      "step": 6429
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.342384934425354,
      "learning_rate": 0.00010305834749112421,
      "loss": 1.5992,
      "step": 6430
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6489508152008057,
      "learning_rate": 0.00010303436784574551,
      "loss": 1.6999,
      "step": 6431
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9905751943588257,
      "learning_rate": 0.00010301038802572154,
      "loss": 1.3583,
      "step": 6432
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0782482624053955,
      "learning_rate": 0.00010298640803243238,
      "loss": 1.2916,
      "step": 6433
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5363678932189941,
      "learning_rate": 0.00010296242786725827,
      "loss": 1.6422,
      "step": 6434
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.638309359550476,
      "learning_rate": 0.00010293844753157942,
      "loss": 1.3811,
      "step": 6435
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7672295570373535,
      "learning_rate": 0.00010291446702677599,
      "loss": 1.6836,
      "step": 6436
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5540319681167603,
      "learning_rate": 0.0001028904863542282,
      "loss": 1.7918,
      "step": 6437
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5870327949523926,
      "learning_rate": 0.00010286650551531631,
      "loss": 1.2237,
      "step": 6438
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1219744682312012,
      "learning_rate": 0.00010284252451142056,
      "loss": 1.0982,
      "step": 6439
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3625290393829346,
      "learning_rate": 0.00010281854334392112,
      "loss": 1.0035,
      "step": 6440
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.546069622039795,
      "learning_rate": 0.00010279456201419833,
      "loss": 0.9136,
      "step": 6441
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.335966944694519,
      "learning_rate": 0.0001027705805236324,
      "loss": 0.55,
      "step": 6442
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.2733511924743652,
      "learning_rate": 0.00010274659887360364,
      "loss": 1.6063,
      "step": 6443
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.7969512939453125,
      "learning_rate": 0.00010272261706549231,
      "loss": 1.7093,
      "step": 6444
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.373081684112549,
      "learning_rate": 0.00010269863510067872,
      "loss": 0.8325,
      "step": 6445
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.105101466178894,
      "learning_rate": 0.00010267465298054313,
      "loss": 1.1287,
      "step": 6446
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.235404133796692,
      "learning_rate": 0.00010265067070646589,
      "loss": 1.6976,
      "step": 6447
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1059514284133911,
      "learning_rate": 0.00010262668827982731,
      "loss": 1.2514,
      "step": 6448
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4148634672164917,
      "learning_rate": 0.00010260270570200767,
      "loss": 1.199,
      "step": 6449
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1330082416534424,
      "learning_rate": 0.00010257872297438738,
      "loss": 1.1849,
      "step": 6450
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6056427955627441,
      "learning_rate": 0.00010255474009834674,
      "loss": 1.6615,
      "step": 6451
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3712081909179688,
      "learning_rate": 0.00010253075707526612,
      "loss": 1.6962,
      "step": 6452
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3290953636169434,
      "learning_rate": 0.00010250677390652586,
      "loss": 1.7298,
      "step": 6453
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.106266975402832,
      "learning_rate": 0.00010248279059350634,
      "loss": 1.5529,
      "step": 6454
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7923921346664429,
      "learning_rate": 0.00010245880713758793,
      "loss": 1.3883,
      "step": 6455
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4060556888580322,
      "learning_rate": 0.00010243482354015103,
      "loss": 1.897,
      "step": 6456
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4107780456542969,
      "learning_rate": 0.00010241083980257604,
      "loss": 1.6998,
      "step": 6457
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.674671173095703,
      "learning_rate": 0.00010238685592624335,
      "loss": 1.4014,
      "step": 6458
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.179594874382019,
      "learning_rate": 0.00010236287191253338,
      "loss": 1.4467,
      "step": 6459
    },
    {
      "epoch": 0.49,
      "grad_norm": 4.978769779205322,
      "learning_rate": 0.00010233888776282649,
      "loss": 2.5617,
      "step": 6460
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8425557613372803,
      "learning_rate": 0.00010231490347850323,
      "loss": 1.8204,
      "step": 6461
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7888915538787842,
      "learning_rate": 0.0001022909190609439,
      "loss": 1.6191,
      "step": 6462
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8046461343765259,
      "learning_rate": 0.000102266934511529,
      "loss": 1.696,
      "step": 6463
    },
    {
      "epoch": 0.49,
      "grad_norm": 9.769966125488281,
      "learning_rate": 0.000102242949831639,
      "loss": 2.3933,
      "step": 6464
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0609670877456665,
      "learning_rate": 0.00010221896502265433,
      "loss": 1.2801,
      "step": 6465
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.850296974182129,
      "learning_rate": 0.00010219498008595547,
      "loss": 1.5831,
      "step": 6466
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1018011569976807,
      "learning_rate": 0.00010217099502292287,
      "loss": 1.3452,
      "step": 6467
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2826210260391235,
      "learning_rate": 0.00010214700983493702,
      "loss": 0.9464,
      "step": 6468
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0850149393081665,
      "learning_rate": 0.00010212302452337836,
      "loss": 1.256,
      "step": 6469
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4338934421539307,
      "learning_rate": 0.00010209903908962746,
      "loss": 0.8983,
      "step": 6470
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.251304268836975,
      "learning_rate": 0.00010207505353506481,
      "loss": 1.2212,
      "step": 6471
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1848710775375366,
      "learning_rate": 0.00010205106786107086,
      "loss": 1.4368,
      "step": 6472
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2917470932006836,
      "learning_rate": 0.00010202708206902615,
      "loss": 1.366,
      "step": 6473
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.460495114326477,
      "learning_rate": 0.00010200309616031123,
      "loss": 1.2837,
      "step": 6474
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2051150798797607,
      "learning_rate": 0.00010197911013630659,
      "loss": 1.4818,
      "step": 6475
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4267786741256714,
      "learning_rate": 0.0001019551239983928,
      "loss": 2.1996,
      "step": 6476
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9055510759353638,
      "learning_rate": 0.00010193113774795036,
      "loss": 1.8689,
      "step": 6477
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3906396627426147,
      "learning_rate": 0.0001019071513863598,
      "loss": 1.4558,
      "step": 6478
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4595857858657837,
      "learning_rate": 0.00010188316491500174,
      "loss": 1.4055,
      "step": 6479
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3207008838653564,
      "learning_rate": 0.00010185917833525669,
      "loss": 1.4536,
      "step": 6480
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8773040771484375,
      "learning_rate": 0.00010183519164850526,
      "loss": 1.4403,
      "step": 6481
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.0902498960494995,
      "learning_rate": 0.00010181120485612796,
      "loss": 1.4123,
      "step": 6482
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.9276440143585205,
      "learning_rate": 0.00010178721795950543,
      "loss": 1.7327,
      "step": 6483
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.057550072669983,
      "learning_rate": 0.00010176323096001818,
      "loss": 1.3206,
      "step": 6484
    },
    {
      "epoch": 0.49,
      "grad_norm": 4.460856914520264,
      "learning_rate": 0.00010173924385904691,
      "loss": 1.683,
      "step": 6485
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5457838773727417,
      "learning_rate": 0.00010171525665797212,
      "loss": 1.7397,
      "step": 6486
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.066497802734375,
      "learning_rate": 0.00010169126935817444,
      "loss": 1.5072,
      "step": 6487
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0548356771469116,
      "learning_rate": 0.00010166728196103445,
      "loss": 1.4574,
      "step": 6488
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2964743375778198,
      "learning_rate": 0.00010164329446793284,
      "loss": 1.1626,
      "step": 6489
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3290778398513794,
      "learning_rate": 0.00010161930688025017,
      "loss": 1.0732,
      "step": 6490
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.341805338859558,
      "learning_rate": 0.00010159531919936707,
      "loss": 1.3881,
      "step": 6491
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.147086501121521,
      "learning_rate": 0.00010157133142666416,
      "loss": 1.1581,
      "step": 6492
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3810808658599854,
      "learning_rate": 0.00010154734356352207,
      "loss": 1.076,
      "step": 6493
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3894498348236084,
      "learning_rate": 0.00010152335561132149,
      "loss": 1.7498,
      "step": 6494
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3303542137145996,
      "learning_rate": 0.00010149936757144305,
      "loss": 1.1219,
      "step": 6495
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3560097217559814,
      "learning_rate": 0.00010147537944526732,
      "loss": 1.3249,
      "step": 6496
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.485142469406128,
      "learning_rate": 0.00010145139123417506,
      "loss": 1.674,
      "step": 6497
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6724016666412354,
      "learning_rate": 0.00010142740293954687,
      "loss": 1.5573,
      "step": 6498
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0544705390930176,
      "learning_rate": 0.00010140341456276344,
      "loss": 1.6877,
      "step": 6499
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.56963849067688,
      "learning_rate": 0.00010137942610520541,
      "loss": 2.0098,
      "step": 6500
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3404816389083862,
      "learning_rate": 0.0001013554375682535,
      "loss": 1.2151,
      "step": 6501
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3047996759414673,
      "learning_rate": 0.00010133144895328832,
      "loss": 1.4401,
      "step": 6502
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3623515367507935,
      "learning_rate": 0.00010130746026169063,
      "loss": 1.726,
      "step": 6503
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7026371955871582,
      "learning_rate": 0.00010128347149484108,
      "loss": 1.6961,
      "step": 6504
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.073207139968872,
      "learning_rate": 0.00010125948265412033,
      "loss": 1.2242,
      "step": 6505
    },
    {
      "epoch": 0.5,
      "grad_norm": 5.331060886383057,
      "learning_rate": 0.00010123549374090912,
      "loss": 1.777,
      "step": 6506
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.323439836502075,
      "learning_rate": 0.00010121150475658816,
      "loss": 1.4971,
      "step": 6507
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6647764444351196,
      "learning_rate": 0.00010118751570253813,
      "loss": 1.5626,
      "step": 6508
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.459473729133606,
      "learning_rate": 0.00010116352658013973,
      "loss": 1.1546,
      "step": 6509
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2633275985717773,
      "learning_rate": 0.00010113953739077367,
      "loss": 1.5149,
      "step": 6510
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2999557256698608,
      "learning_rate": 0.00010111554813582066,
      "loss": 1.0668,
      "step": 6511
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.351360559463501,
      "learning_rate": 0.00010109155881666148,
      "loss": 1.4431,
      "step": 6512
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1538339853286743,
      "learning_rate": 0.00010106756943467679,
      "loss": 1.7001,
      "step": 6513
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.776597738265991,
      "learning_rate": 0.00010104357999124735,
      "loss": 1.3611,
      "step": 6514
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.38173508644104,
      "learning_rate": 0.00010101959048775387,
      "loss": 1.3041,
      "step": 6515
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.132672905921936,
      "learning_rate": 0.0001009956009255771,
      "loss": 1.4842,
      "step": 6516
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6793826818466187,
      "learning_rate": 0.00010097161130609773,
      "loss": 1.5119,
      "step": 6517
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.391869068145752,
      "learning_rate": 0.00010094762163069659,
      "loss": 1.4827,
      "step": 6518
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2719467878341675,
      "learning_rate": 0.00010092363190075437,
      "loss": 1.1548,
      "step": 6519
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3125487565994263,
      "learning_rate": 0.0001008996421176518,
      "loss": 1.1494,
      "step": 6520
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.939373254776001,
      "learning_rate": 0.00010087565228276965,
      "loss": 1.3782,
      "step": 6521
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2008627653121948,
      "learning_rate": 0.0001008516623974887,
      "loss": 1.4392,
      "step": 6522
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4395569562911987,
      "learning_rate": 0.00010082767246318965,
      "loss": 1.8314,
      "step": 6523
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1214659214019775,
      "learning_rate": 0.00010080368248125332,
      "loss": 1.5835,
      "step": 6524
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1130318641662598,
      "learning_rate": 0.00010077969245306038,
      "loss": 1.6062,
      "step": 6525
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6542418003082275,
      "learning_rate": 0.0001007557023799917,
      "loss": 1.5016,
      "step": 6526
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.420354962348938,
      "learning_rate": 0.00010073171226342799,
      "loss": 1.7316,
      "step": 6527
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.353786826133728,
      "learning_rate": 0.00010070772210475003,
      "loss": 0.987,
      "step": 6528
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0678868293762207,
      "learning_rate": 0.00010068373190533862,
      "loss": 1.0919,
      "step": 6529
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3172383308410645,
      "learning_rate": 0.00010065974166657448,
      "loss": 1.5258,
      "step": 6530
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1590558290481567,
      "learning_rate": 0.00010063575138983838,
      "loss": 0.9739,
      "step": 6531
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0640017986297607,
      "learning_rate": 0.0001006117610765112,
      "loss": 1.4048,
      "step": 6532
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2834807634353638,
      "learning_rate": 0.00010058777072797361,
      "loss": 1.2966,
      "step": 6533
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5777204036712646,
      "learning_rate": 0.0001005637803456064,
      "loss": 1.6352,
      "step": 6534
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4488093852996826,
      "learning_rate": 0.00010053978993079045,
      "loss": 1.5431,
      "step": 6535
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7484335899353027,
      "learning_rate": 0.00010051579948490646,
      "loss": 0.9596,
      "step": 6536
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4749956130981445,
      "learning_rate": 0.00010049180900933524,
      "loss": 0.9491,
      "step": 6537
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6312205791473389,
      "learning_rate": 0.00010046781850545758,
      "loss": 1.0591,
      "step": 6538
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4074658155441284,
      "learning_rate": 0.00010044382797465428,
      "loss": 1.4175,
      "step": 6539
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3047207593917847,
      "learning_rate": 0.00010041983741830611,
      "loss": 1.6377,
      "step": 6540
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.479372262954712,
      "learning_rate": 0.00010039584683779393,
      "loss": 1.0836,
      "step": 6541
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9117805361747742,
      "learning_rate": 0.00010037185623449846,
      "loss": 1.5329,
      "step": 6542
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.726064920425415,
      "learning_rate": 0.00010034786560980049,
      "loss": 1.9655,
      "step": 6543
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.290161609649658,
      "learning_rate": 0.00010032387496508089,
      "loss": 1.5875,
      "step": 6544
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.264828085899353,
      "learning_rate": 0.00010029988430172041,
      "loss": 1.0513,
      "step": 6545
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.117570161819458,
      "learning_rate": 0.00010027589362109986,
      "loss": 1.0881,
      "step": 6546
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8786550760269165,
      "learning_rate": 0.00010025190292460005,
      "loss": 1.6565,
      "step": 6547
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.662513017654419,
      "learning_rate": 0.00010022791221360179,
      "loss": 1.0642,
      "step": 6548
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8201361894607544,
      "learning_rate": 0.00010020392148948585,
      "loss": 1.4419,
      "step": 6549
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5455631017684937,
      "learning_rate": 0.00010017993075363305,
      "loss": 1.5892,
      "step": 6550
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2932685613632202,
      "learning_rate": 0.00010015594000742425,
      "loss": 1.3012,
      "step": 6551
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9839116930961609,
      "learning_rate": 0.00010013194925224015,
      "loss": 1.0568,
      "step": 6552
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.731921434402466,
      "learning_rate": 0.00010010795848946164,
      "loss": 2.5008,
      "step": 6553
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2407945394515991,
      "learning_rate": 0.00010008396772046948,
      "loss": 1.4289,
      "step": 6554
    },
    {
      "epoch": 0.5,
      "eval_loss": NaN,
      "eval_runtime": 303.0802,
      "eval_samples_per_second": 9.107,
      "eval_steps_per_second": 9.107,
      "step": 6554
    },
    {
      "epoch": 0.5,
      "grad_norm": 4.527648448944092,
      "learning_rate": 0.00010005997694664451,
      "loss": 2.2824,
      "step": 6555
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9281975030899048,
      "learning_rate": 0.00010003598616936754,
      "loss": 0.8014,
      "step": 6556
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1956651210784912,
      "learning_rate": 0.00010001199539001933,
      "loss": 1.1662,
      "step": 6557
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7743799686431885,
      "learning_rate": 9.998800460998072e-05,
      "loss": 1.7554,
      "step": 6558
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2998340129852295,
      "learning_rate": 9.99640138306325e-05,
      "loss": 0.9787,
      "step": 6559
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.641343355178833,
      "learning_rate": 9.99400230533555e-05,
      "loss": 1.3305,
      "step": 6560
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3698173761367798,
      "learning_rate": 9.991603227953055e-05,
      "loss": 1.0458,
      "step": 6561
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2293579578399658,
      "learning_rate": 9.989204151053839e-05,
      "loss": 1.4238,
      "step": 6562
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.119142532348633,
      "learning_rate": 9.986805074775985e-05,
      "loss": 0.854,
      "step": 6563
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2725770473480225,
      "learning_rate": 9.98440599925758e-05,
      "loss": 1.432,
      "step": 6564
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0948084592819214,
      "learning_rate": 9.982006924636697e-05,
      "loss": 1.1696,
      "step": 6565
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5961538553237915,
      "learning_rate": 9.979607851051416e-05,
      "loss": 1.2461,
      "step": 6566
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1090831756591797,
      "learning_rate": 9.977208778639825e-05,
      "loss": 1.3873,
      "step": 6567
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2983076572418213,
      "learning_rate": 9.974809707539996e-05,
      "loss": 1.2858,
      "step": 6568
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.382012963294983,
      "learning_rate": 9.972410637890015e-05,
      "loss": 1.3871,
      "step": 6569
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1286240816116333,
      "learning_rate": 9.970011569827964e-05,
      "loss": 0.7628,
      "step": 6570
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2853790521621704,
      "learning_rate": 9.967612503491914e-05,
      "loss": 0.6572,
      "step": 6571
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4446614980697632,
      "learning_rate": 9.965213439019952e-05,
      "loss": 1.398,
      "step": 6572
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.955332636833191,
      "learning_rate": 9.96281437655016e-05,
      "loss": 1.7416,
      "step": 6573
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.117656946182251,
      "learning_rate": 9.96041531622061e-05,
      "loss": 1.4462,
      "step": 6574
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8139530420303345,
      "learning_rate": 9.95801625816939e-05,
      "loss": 1.8648,
      "step": 6575
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2180283069610596,
      "learning_rate": 9.955617202534576e-05,
      "loss": 2.0424,
      "step": 6576
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1447439193725586,
      "learning_rate": 9.953218149454243e-05,
      "loss": 1.9203,
      "step": 6577
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9186914563179016,
      "learning_rate": 9.950819099066477e-05,
      "loss": 1.1602,
      "step": 6578
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6741234064102173,
      "learning_rate": 9.948420051509357e-05,
      "loss": 1.2599,
      "step": 6579
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7483413219451904,
      "learning_rate": 9.946021006920959e-05,
      "loss": 1.2459,
      "step": 6580
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4988136291503906,
      "learning_rate": 9.943621965439358e-05,
      "loss": 1.6864,
      "step": 6581
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.173563241958618,
      "learning_rate": 9.941222927202643e-05,
      "loss": 1.764,
      "step": 6582
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6715408563613892,
      "learning_rate": 9.938823892348883e-05,
      "loss": 1.7972,
      "step": 6583
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9565512537956238,
      "learning_rate": 9.936424861016161e-05,
      "loss": 1.0598,
      "step": 6584
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.41102135181427,
      "learning_rate": 9.934025833342557e-05,
      "loss": 1.3444,
      "step": 6585
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1499526500701904,
      "learning_rate": 9.931626809466142e-05,
      "loss": 1.4471,
      "step": 6586
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.541626214981079,
      "learning_rate": 9.929227789524997e-05,
      "loss": 1.5336,
      "step": 6587
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9019841551780701,
      "learning_rate": 9.926828773657204e-05,
      "loss": 1.4307,
      "step": 6588
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5769704580307007,
      "learning_rate": 9.924429762000832e-05,
      "loss": 1.5558,
      "step": 6589
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0981355905532837,
      "learning_rate": 9.922030754693962e-05,
      "loss": 1.3051,
      "step": 6590
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2558245658874512,
      "learning_rate": 9.919631751874675e-05,
      "loss": 1.1934,
      "step": 6591
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2636055946350098,
      "learning_rate": 9.917232753681037e-05,
      "loss": 1.6442,
      "step": 6592
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6036113500595093,
      "learning_rate": 9.914833760251133e-05,
      "loss": 1.5274,
      "step": 6593
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8930752277374268,
      "learning_rate": 9.91243477172304e-05,
      "loss": 2.056,
      "step": 6594
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9312207698822021,
      "learning_rate": 9.910035788234822e-05,
      "loss": 1.0393,
      "step": 6595
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.656327247619629,
      "learning_rate": 9.907636809924564e-05,
      "loss": 0.9868,
      "step": 6596
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8403855562210083,
      "learning_rate": 9.905237836930343e-05,
      "loss": 1.7072,
      "step": 6597
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2795575857162476,
      "learning_rate": 9.902838869390229e-05,
      "loss": 1.3697,
      "step": 6598
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1722168922424316,
      "learning_rate": 9.900439907442292e-05,
      "loss": 1.0681,
      "step": 6599
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4320099353790283,
      "learning_rate": 9.898040951224618e-05,
      "loss": 1.4559,
      "step": 6600
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6756943464279175,
      "learning_rate": 9.895642000875266e-05,
      "loss": 1.4211,
      "step": 6601
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.156604051589966,
      "learning_rate": 9.893243056532323e-05,
      "loss": 1.614,
      "step": 6602
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5308876037597656,
      "learning_rate": 9.890844118333854e-05,
      "loss": 2.1086,
      "step": 6603
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0503031015396118,
      "learning_rate": 9.888445186417934e-05,
      "loss": 0.8346,
      "step": 6604
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5110753774642944,
      "learning_rate": 9.886046260922634e-05,
      "loss": 1.2283,
      "step": 6605
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2425025701522827,
      "learning_rate": 9.883647341986032e-05,
      "loss": 1.7009,
      "step": 6606
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2456469535827637,
      "learning_rate": 9.88124842974619e-05,
      "loss": 1.7395,
      "step": 6607
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4373929500579834,
      "learning_rate": 9.878849524341186e-05,
      "loss": 1.1194,
      "step": 6608
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8467577695846558,
      "learning_rate": 9.87645062590909e-05,
      "loss": 1.2475,
      "step": 6609
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5075223445892334,
      "learning_rate": 9.874051734587968e-05,
      "loss": 1.3274,
      "step": 6610
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.077821969985962,
      "learning_rate": 9.871652850515893e-05,
      "loss": 0.9698,
      "step": 6611
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9497612714767456,
      "learning_rate": 9.86925397383094e-05,
      "loss": 1.116,
      "step": 6612
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.5102458000183105,
      "learning_rate": 9.86685510467117e-05,
      "loss": 1.8895,
      "step": 6613
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.0524286031723022,
      "learning_rate": 9.864456243174652e-05,
      "loss": 1.3086,
      "step": 6614
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9593332409858704,
      "learning_rate": 9.862057389479462e-05,
      "loss": 1.2476,
      "step": 6615
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5842918157577515,
      "learning_rate": 9.859658543723659e-05,
      "loss": 1.143,
      "step": 6616
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6653352975845337,
      "learning_rate": 9.857259706045315e-05,
      "loss": 1.1476,
      "step": 6617
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2817462682724,
      "learning_rate": 9.854860876582499e-05,
      "loss": 1.0965,
      "step": 6618
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5285710096359253,
      "learning_rate": 9.852462055473269e-05,
      "loss": 1.184,
      "step": 6619
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.576312303543091,
      "learning_rate": 9.850063242855699e-05,
      "loss": 1.16,
      "step": 6620
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9219595193862915,
      "learning_rate": 9.847664438867854e-05,
      "loss": 1.4472,
      "step": 6621
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.44918155670166,
      "learning_rate": 9.845265643647795e-05,
      "loss": 1.5394,
      "step": 6622
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7961974143981934,
      "learning_rate": 9.842866857333585e-05,
      "loss": 1.2491,
      "step": 6623
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8332017660140991,
      "learning_rate": 9.840468080063299e-05,
      "loss": 1.6769,
      "step": 6624
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3929131031036377,
      "learning_rate": 9.838069311974986e-05,
      "loss": 1.0919,
      "step": 6625
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.723301649093628,
      "learning_rate": 9.835670553206718e-05,
      "loss": 1.2979,
      "step": 6626
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.056198835372925,
      "learning_rate": 9.833271803896553e-05,
      "loss": 1.7722,
      "step": 6627
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5831146240234375,
      "learning_rate": 9.83087306418256e-05,
      "loss": 1.5557,
      "step": 6628
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4114927053451538,
      "learning_rate": 9.82847433420279e-05,
      "loss": 1.621,
      "step": 6629
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9076149463653564,
      "learning_rate": 9.826075614095311e-05,
      "loss": 1.5683,
      "step": 6630
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3132866621017456,
      "learning_rate": 9.823676903998184e-05,
      "loss": 1.2682,
      "step": 6631
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7459511756896973,
      "learning_rate": 9.821278204049459e-05,
      "loss": 1.8171,
      "step": 6632
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.367043137550354,
      "learning_rate": 9.818879514387203e-05,
      "loss": 1.3149,
      "step": 6633
    },
    {
      "epoch": 0.51,
      "grad_norm": 4.584529399871826,
      "learning_rate": 9.816480835149478e-05,
      "loss": 2.2838,
      "step": 6634
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.257565498352051,
      "learning_rate": 9.814082166474332e-05,
      "loss": 1.6922,
      "step": 6635
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2133066654205322,
      "learning_rate": 9.811683508499827e-05,
      "loss": 1.5999,
      "step": 6636
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3039947748184204,
      "learning_rate": 9.809284861364024e-05,
      "loss": 1.5042,
      "step": 6637
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6060452461242676,
      "learning_rate": 9.806886225204968e-05,
      "loss": 1.2476,
      "step": 6638
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5623114109039307,
      "learning_rate": 9.804487600160723e-05,
      "loss": 1.7336,
      "step": 6639
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5689196586608887,
      "learning_rate": 9.802088986369342e-05,
      "loss": 1.1645,
      "step": 6640
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1004096269607544,
      "learning_rate": 9.799690383968879e-05,
      "loss": 1.272,
      "step": 6641
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.114583730697632,
      "learning_rate": 9.797291793097384e-05,
      "loss": 1.2672,
      "step": 6642
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.405360221862793,
      "learning_rate": 9.794893213892917e-05,
      "loss": 2.1245,
      "step": 6643
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.653167486190796,
      "learning_rate": 9.792494646493521e-05,
      "loss": 1.339,
      "step": 6644
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2277934551239014,
      "learning_rate": 9.790096091037253e-05,
      "loss": 1.7171,
      "step": 6645
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7658207416534424,
      "learning_rate": 9.787697547662166e-05,
      "loss": 1.5876,
      "step": 6646
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2821528911590576,
      "learning_rate": 9.785299016506302e-05,
      "loss": 1.8131,
      "step": 6647
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1711126565933228,
      "learning_rate": 9.782900497707714e-05,
      "loss": 1.7193,
      "step": 6648
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5579767227172852,
      "learning_rate": 9.780501991404456e-05,
      "loss": 1.0738,
      "step": 6649
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2714147567749023,
      "learning_rate": 9.778103497734569e-05,
      "loss": 1.5504,
      "step": 6650
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.073651075363159,
      "learning_rate": 9.7757050168361e-05,
      "loss": 1.8018,
      "step": 6651
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9380784034729004,
      "learning_rate": 9.7733065488471e-05,
      "loss": 1.3234,
      "step": 6652
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3520359992980957,
      "learning_rate": 9.770908093905611e-05,
      "loss": 0.8977,
      "step": 6653
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.280253529548645,
      "learning_rate": 9.768509652149679e-05,
      "loss": 1.3555,
      "step": 6654
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4451019763946533,
      "learning_rate": 9.766111223717352e-05,
      "loss": 0.897,
      "step": 6655
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0496246814727783,
      "learning_rate": 9.763712808746665e-05,
      "loss": 1.1638,
      "step": 6656
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.210849642753601,
      "learning_rate": 9.761314407375664e-05,
      "loss": 1.9115,
      "step": 6657
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.1147713661193848,
      "learning_rate": 9.758916019742397e-05,
      "loss": 0.8321,
      "step": 6658
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3579890727996826,
      "learning_rate": 9.756517645984899e-05,
      "loss": 1.4725,
      "step": 6659
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.418222665786743,
      "learning_rate": 9.754119286241207e-05,
      "loss": 1.8189,
      "step": 6660
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.015899896621704,
      "learning_rate": 9.751720940649369e-05,
      "loss": 1.5231,
      "step": 6661
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5619761943817139,
      "learning_rate": 9.749322609347415e-05,
      "loss": 0.9235,
      "step": 6662
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.434499979019165,
      "learning_rate": 9.74692429247339e-05,
      "loss": 1.1658,
      "step": 6663
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.973778247833252,
      "learning_rate": 9.74452599016533e-05,
      "loss": 1.8337,
      "step": 6664
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3386497497558594,
      "learning_rate": 9.742127702561263e-05,
      "loss": 2.144,
      "step": 6665
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9616943597793579,
      "learning_rate": 9.739729429799232e-05,
      "loss": 1.2213,
      "step": 6666
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4843127727508545,
      "learning_rate": 9.737331172017274e-05,
      "loss": 1.1848,
      "step": 6667
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1136155128479004,
      "learning_rate": 9.734932929353414e-05,
      "loss": 1.4044,
      "step": 6668
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9354429244995117,
      "learning_rate": 9.732534701945688e-05,
      "loss": 1.4473,
      "step": 6669
    },
    {
      "epoch": 0.51,
      "grad_norm": 4.035079002380371,
      "learning_rate": 9.730136489932133e-05,
      "loss": 1.4863,
      "step": 6670
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.173321008682251,
      "learning_rate": 9.72773829345077e-05,
      "loss": 1.4972,
      "step": 6671
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9833093285560608,
      "learning_rate": 9.725340112639637e-05,
      "loss": 1.2831,
      "step": 6672
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.215932846069336,
      "learning_rate": 9.722941947636761e-05,
      "loss": 1.1316,
      "step": 6673
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3765058517456055,
      "learning_rate": 9.72054379858017e-05,
      "loss": 1.3523,
      "step": 6674
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.4746134281158447,
      "learning_rate": 9.718145665607888e-05,
      "loss": 1.2485,
      "step": 6675
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.136070966720581,
      "learning_rate": 9.715747548857949e-05,
      "loss": 1.7592,
      "step": 6676
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.266459345817566,
      "learning_rate": 9.71334944846837e-05,
      "loss": 1.327,
      "step": 6677
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8032679557800293,
      "learning_rate": 9.710951364577181e-05,
      "loss": 1.7097,
      "step": 6678
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6669975519180298,
      "learning_rate": 9.708553297322406e-05,
      "loss": 1.0364,
      "step": 6679
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3975598812103271,
      "learning_rate": 9.706155246842062e-05,
      "loss": 1.8706,
      "step": 6680
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0118072032928467,
      "learning_rate": 9.703757213274172e-05,
      "loss": 1.9303,
      "step": 6681
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.633188247680664,
      "learning_rate": 9.701359196756764e-05,
      "loss": 1.9054,
      "step": 6682
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2017358541488647,
      "learning_rate": 9.698961197427849e-05,
      "loss": 1.3355,
      "step": 6683
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.0125391483306885,
      "learning_rate": 9.696563215425448e-05,
      "loss": 2.5419,
      "step": 6684
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3258825540542603,
      "learning_rate": 9.694165250887584e-05,
      "loss": 1.5679,
      "step": 6685
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7415528297424316,
      "learning_rate": 9.691767303952264e-05,
      "loss": 1.9827,
      "step": 6686
    },
    {
      "epoch": 0.51,
      "grad_norm": 7.394689083099365,
      "learning_rate": 9.689369374757512e-05,
      "loss": 1.7701,
      "step": 6687
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1252208948135376,
      "learning_rate": 9.68697146344134e-05,
      "loss": 1.7536,
      "step": 6688
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0663578510284424,
      "learning_rate": 9.684573570141758e-05,
      "loss": 1.2123,
      "step": 6689
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.498765230178833,
      "learning_rate": 9.682175694996781e-05,
      "loss": 1.6353,
      "step": 6690
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2229219675064087,
      "learning_rate": 9.679777838144425e-05,
      "loss": 1.0266,
      "step": 6691
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.0803887844085693,
      "learning_rate": 9.677379999722694e-05,
      "loss": 2.0123,
      "step": 6692
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.132120966911316,
      "learning_rate": 9.674982179869599e-05,
      "loss": 1.1923,
      "step": 6693
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1969956159591675,
      "learning_rate": 9.672584378723154e-05,
      "loss": 1.2758,
      "step": 6694
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1353532075881958,
      "learning_rate": 9.670186596421357e-05,
      "loss": 1.8155,
      "step": 6695
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3889124393463135,
      "learning_rate": 9.66778883310222e-05,
      "loss": 1.7091,
      "step": 6696
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5438333749771118,
      "learning_rate": 9.66539108890375e-05,
      "loss": 1.6547,
      "step": 6697
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1990747451782227,
      "learning_rate": 9.662993363963941e-05,
      "loss": 1.834,
      "step": 6698
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1816303730010986,
      "learning_rate": 9.660595658420805e-05,
      "loss": 1.4857,
      "step": 6699
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3192329406738281,
      "learning_rate": 9.658197972412345e-05,
      "loss": 1.6685,
      "step": 6700
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.556454658508301,
      "learning_rate": 9.655800306076556e-05,
      "loss": 1.4848,
      "step": 6701
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1422104835510254,
      "learning_rate": 9.653402659551437e-05,
      "loss": 1.748,
      "step": 6702
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.499541997909546,
      "learning_rate": 9.651005032974994e-05,
      "loss": 2.069,
      "step": 6703
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5947009325027466,
      "learning_rate": 9.648607426485213e-05,
      "loss": 1.7983,
      "step": 6704
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.2489922046661377,
      "learning_rate": 9.646209840220098e-05,
      "loss": 1.1701,
      "step": 6705
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9676095247268677,
      "learning_rate": 9.643812274317644e-05,
      "loss": 1.1752,
      "step": 6706
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.564142942428589,
      "learning_rate": 9.641414728915842e-05,
      "loss": 1.3231,
      "step": 6707
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1885898113250732,
      "learning_rate": 9.639017204152682e-05,
      "loss": 1.3103,
      "step": 6708
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2244747877120972,
      "learning_rate": 9.636619700166164e-05,
      "loss": 0.9659,
      "step": 6709
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9488661885261536,
      "learning_rate": 9.634222217094267e-05,
      "loss": 0.9773,
      "step": 6710
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8409714698791504,
      "learning_rate": 9.63182475507499e-05,
      "loss": 1.7421,
      "step": 6711
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.584796905517578,
      "learning_rate": 9.629427314246317e-05,
      "loss": 2.0278,
      "step": 6712
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9674278497695923,
      "learning_rate": 9.62702989474623e-05,
      "loss": 1.768,
      "step": 6713
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9548982381820679,
      "learning_rate": 9.624632496712718e-05,
      "loss": 0.9996,
      "step": 6714
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0133864879608154,
      "learning_rate": 9.622235120283769e-05,
      "loss": 1.1521,
      "step": 6715
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3787189722061157,
      "learning_rate": 9.619837765597361e-05,
      "loss": 1.5436,
      "step": 6716
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2511078119277954,
      "learning_rate": 9.617440432791473e-05,
      "loss": 1.4805,
      "step": 6717
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3710119724273682,
      "learning_rate": 9.615043122004094e-05,
      "loss": 1.5614,
      "step": 6718
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6142889261245728,
      "learning_rate": 9.612645833373192e-05,
      "loss": 1.6801,
      "step": 6719
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6467279195785522,
      "learning_rate": 9.610248567036755e-05,
      "loss": 1.4873,
      "step": 6720
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.181391477584839,
      "learning_rate": 9.607851323132756e-05,
      "loss": 1.5014,
      "step": 6721
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.401761770248413,
      "learning_rate": 9.605454101799165e-05,
      "loss": 1.9684,
      "step": 6722
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9212312698364258,
      "learning_rate": 9.60305690317396e-05,
      "loss": 1.8921,
      "step": 6723
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.288552165031433,
      "learning_rate": 9.600659727395119e-05,
      "loss": 1.3205,
      "step": 6724
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3080310821533203,
      "learning_rate": 9.598262574600605e-05,
      "loss": 1.0684,
      "step": 6725
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5453029870986938,
      "learning_rate": 9.595865444928389e-05,
      "loss": 1.5894,
      "step": 6726
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2486631870269775,
      "learning_rate": 9.593468338516446e-05,
      "loss": 1.5823,
      "step": 6727
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2135587930679321,
      "learning_rate": 9.591071255502733e-05,
      "loss": 1.5632,
      "step": 6728
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.274005889892578,
      "learning_rate": 9.588674196025225e-05,
      "loss": 1.7725,
      "step": 6729
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.533021330833435,
      "learning_rate": 9.586277160221884e-05,
      "loss": 1.4821,
      "step": 6730
    },
    {
      "epoch": 0.51,
      "grad_norm": 4.668179988861084,
      "learning_rate": 9.583880148230672e-05,
      "loss": 1.9399,
      "step": 6731
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3242268562316895,
      "learning_rate": 9.581483160189549e-05,
      "loss": 1.7595,
      "step": 6732
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.9784364700317383,
      "learning_rate": 9.579086196236482e-05,
      "loss": 1.5975,
      "step": 6733
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8921316862106323,
      "learning_rate": 9.576689256509421e-05,
      "loss": 2.0753,
      "step": 6734
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1031417846679688,
      "learning_rate": 9.574292341146332e-05,
      "loss": 1.0577,
      "step": 6735
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2085989713668823,
      "learning_rate": 9.571895450285168e-05,
      "loss": 1.4724,
      "step": 6736
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.835673213005066,
      "learning_rate": 9.56949858406388e-05,
      "loss": 1.7916,
      "step": 6737
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.675421953201294,
      "learning_rate": 9.567101742620425e-05,
      "loss": 1.0968,
      "step": 6738
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0765799283981323,
      "learning_rate": 9.56470492609276e-05,
      "loss": 1.6777,
      "step": 6739
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.379431962966919,
      "learning_rate": 9.562308134618828e-05,
      "loss": 1.4392,
      "step": 6740
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0953316688537598,
      "learning_rate": 9.55991136833658e-05,
      "loss": 1.9845,
      "step": 6741
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3646618127822876,
      "learning_rate": 9.557514627383967e-05,
      "loss": 1.0387,
      "step": 6742
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4297206401824951,
      "learning_rate": 9.555117911898927e-05,
      "loss": 1.4345,
      "step": 6743
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0662137269973755,
      "learning_rate": 9.552721222019414e-05,
      "loss": 1.4316,
      "step": 6744
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1174224615097046,
      "learning_rate": 9.550324557883373e-05,
      "loss": 1.652,
      "step": 6745
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0972635746002197,
      "learning_rate": 9.547927919628732e-05,
      "loss": 1.3491,
      "step": 6746
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1203573942184448,
      "learning_rate": 9.545531307393441e-05,
      "loss": 1.47,
      "step": 6747
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.0251307487487793,
      "learning_rate": 9.543134721315443e-05,
      "loss": 2.0866,
      "step": 6748
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2278648614883423,
      "learning_rate": 9.540738161532668e-05,
      "loss": 1.4932,
      "step": 6749
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1565589904785156,
      "learning_rate": 9.538341628183049e-05,
      "loss": 1.8,
      "step": 6750
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3753843307495117,
      "learning_rate": 9.535945121404531e-05,
      "loss": 1.4337,
      "step": 6751
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.167726755142212,
      "learning_rate": 9.533548641335037e-05,
      "loss": 1.6192,
      "step": 6752
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.838971734046936,
      "learning_rate": 9.531152188112505e-05,
      "loss": 1.7899,
      "step": 6753
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9520559906959534,
      "learning_rate": 9.528755761874862e-05,
      "loss": 1.6032,
      "step": 6754
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0701751708984375,
      "learning_rate": 9.526359362760032e-05,
      "loss": 1.153,
      "step": 6755
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.632973074913025,
      "learning_rate": 9.523962990905946e-05,
      "loss": 1.3223,
      "step": 6756
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.090667963027954,
      "learning_rate": 9.521566646450533e-05,
      "loss": 2.411,
      "step": 6757
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9646158814430237,
      "learning_rate": 9.519170329531708e-05,
      "loss": 1.2716,
      "step": 6758
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3494383096694946,
      "learning_rate": 9.516774040287394e-05,
      "loss": 1.7485,
      "step": 6759
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5768412351608276,
      "learning_rate": 9.514377778855521e-05,
      "loss": 1.7218,
      "step": 6760
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1162279844284058,
      "learning_rate": 9.511981545373994e-05,
      "loss": 0.9183,
      "step": 6761
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.292351007461548,
      "learning_rate": 9.50958533998074e-05,
      "loss": 1.3209,
      "step": 6762
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3886139392852783,
      "learning_rate": 9.507189162813668e-05,
      "loss": 1.3858,
      "step": 6763
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2795439958572388,
      "learning_rate": 9.504793014010699e-05,
      "loss": 1.9661,
      "step": 6764
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1366322040557861,
      "learning_rate": 9.502396893709736e-05,
      "loss": 1.3062,
      "step": 6765
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1836137771606445,
      "learning_rate": 9.500000802048698e-05,
      "loss": 0.944,
      "step": 6766
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3561456203460693,
      "learning_rate": 9.497604739165488e-05,
      "loss": 1.5182,
      "step": 6767
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1422001123428345,
      "learning_rate": 9.495208705198016e-05,
      "loss": 1.268,
      "step": 6768
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0118815898895264,
      "learning_rate": 9.492812700284186e-05,
      "loss": 2.0188,
      "step": 6769
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5241479873657227,
      "learning_rate": 9.490416724561907e-05,
      "loss": 1.4038,
      "step": 6770
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1349809169769287,
      "learning_rate": 9.488020778169071e-05,
      "loss": 0.7948,
      "step": 6771
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8952709436416626,
      "learning_rate": 9.485624861243588e-05,
      "loss": 1.8162,
      "step": 6772
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7594425678253174,
      "learning_rate": 9.483228973923354e-05,
      "loss": 1.8111,
      "step": 6773
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9848475456237793,
      "learning_rate": 9.480833116346264e-05,
      "loss": 1.2256,
      "step": 6774
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5006258487701416,
      "learning_rate": 9.478437288650213e-05,
      "loss": 1.2322,
      "step": 6775
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.220366358757019,
      "learning_rate": 9.4760414909731e-05,
      "loss": 0.8794,
      "step": 6776
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.397484540939331,
      "learning_rate": 9.473645723452812e-05,
      "loss": 1.3598,
      "step": 6777
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1228922605514526,
      "learning_rate": 9.471249986227238e-05,
      "loss": 1.141,
      "step": 6778
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9732511043548584,
      "learning_rate": 9.468854279434274e-05,
      "loss": 0.9648,
      "step": 6779
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3007456064224243,
      "learning_rate": 9.466458603211796e-05,
      "loss": 1.2444,
      "step": 6780
    },
    {
      "epoch": 0.52,
      "grad_norm": 4.83302640914917,
      "learning_rate": 9.464062957697699e-05,
      "loss": 1.85,
      "step": 6781
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3320751190185547,
      "learning_rate": 9.461667343029863e-05,
      "loss": 1.3083,
      "step": 6782
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5544788837432861,
      "learning_rate": 9.459271759346163e-05,
      "loss": 1.5373,
      "step": 6783
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5108381509780884,
      "learning_rate": 9.456876206784486e-05,
      "loss": 1.336,
      "step": 6784
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.290866732597351,
      "learning_rate": 9.45448068548271e-05,
      "loss": 1.8121,
      "step": 6785
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.645278811454773,
      "learning_rate": 9.452085195578707e-05,
      "loss": 1.3609,
      "step": 6786
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4153048992156982,
      "learning_rate": 9.449689737210352e-05,
      "loss": 1.4358,
      "step": 6787
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0660512447357178,
      "learning_rate": 9.447294310515521e-05,
      "loss": 0.8438,
      "step": 6788
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1617227792739868,
      "learning_rate": 9.444898915632079e-05,
      "loss": 1.7367,
      "step": 6789
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.318830966949463,
      "learning_rate": 9.442503552697899e-05,
      "loss": 0.6268,
      "step": 6790
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3364166021347046,
      "learning_rate": 9.44010822185085e-05,
      "loss": 1.3303,
      "step": 6791
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.069105625152588,
      "learning_rate": 9.437712923228788e-05,
      "loss": 1.0044,
      "step": 6792
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9960943460464478,
      "learning_rate": 9.435317656969582e-05,
      "loss": 1.7797,
      "step": 6793
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2272902727127075,
      "learning_rate": 9.432922423211099e-05,
      "loss": 1.7301,
      "step": 6794
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9827807545661926,
      "learning_rate": 9.43052722209119e-05,
      "loss": 1.4297,
      "step": 6795
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4459893703460693,
      "learning_rate": 9.428132053747712e-05,
      "loss": 1.5553,
      "step": 6796
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3639322519302368,
      "learning_rate": 9.42573691831853e-05,
      "loss": 1.1547,
      "step": 6797
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9839439988136292,
      "learning_rate": 9.423341815941487e-05,
      "loss": 1.476,
      "step": 6798
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6678684949874878,
      "learning_rate": 9.420946746754442e-05,
      "loss": 1.5148,
      "step": 6799
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8340065479278564,
      "learning_rate": 9.418551710895243e-05,
      "loss": 2.0211,
      "step": 6800
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.282864570617676,
      "learning_rate": 9.416156708501738e-05,
      "loss": 1.4566,
      "step": 6801
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0961838960647583,
      "learning_rate": 9.413761739711771e-05,
      "loss": 0.9031,
      "step": 6802
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2881122827529907,
      "learning_rate": 9.411366804663191e-05,
      "loss": 1.3615,
      "step": 6803
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.309981107711792,
      "learning_rate": 9.408971903493835e-05,
      "loss": 1.905,
      "step": 6804
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6143722534179688,
      "learning_rate": 9.406577036341548e-05,
      "loss": 1.897,
      "step": 6805
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3873395919799805,
      "learning_rate": 9.404182203344167e-05,
      "loss": 1.0594,
      "step": 6806
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.448751449584961,
      "learning_rate": 9.401787404639524e-05,
      "loss": 1.5847,
      "step": 6807
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3161613941192627,
      "learning_rate": 9.399392640365459e-05,
      "loss": 1.4686,
      "step": 6808
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1026279926300049,
      "learning_rate": 9.396997910659807e-05,
      "loss": 0.904,
      "step": 6809
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3527764081954956,
      "learning_rate": 9.39460321566039e-05,
      "loss": 1.81,
      "step": 6810
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0788482427597046,
      "learning_rate": 9.392208555505039e-05,
      "loss": 1.4327,
      "step": 6811
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.167508602142334,
      "learning_rate": 9.389813930331587e-05,
      "loss": 1.4918,
      "step": 6812
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2863065004348755,
      "learning_rate": 9.387419340277848e-05,
      "loss": 1.3816,
      "step": 6813
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9520930051803589,
      "learning_rate": 9.385024785481654e-05,
      "loss": 1.9257,
      "step": 6814
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3553411960601807,
      "learning_rate": 9.382630266080822e-05,
      "loss": 1.7191,
      "step": 6815
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0325983762741089,
      "learning_rate": 9.380235782213165e-05,
      "loss": 1.4635,
      "step": 6816
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.290109395980835,
      "learning_rate": 9.377841334016506e-05,
      "loss": 1.5143,
      "step": 6817
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9877443909645081,
      "learning_rate": 9.37544692162866e-05,
      "loss": 1.4205,
      "step": 6818
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7308884859085083,
      "learning_rate": 9.373052545187434e-05,
      "loss": 1.4364,
      "step": 6819
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.021898031234741,
      "learning_rate": 9.37065820483064e-05,
      "loss": 1.1694,
      "step": 6820
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.359763741493225,
      "learning_rate": 9.36826390069609e-05,
      "loss": 1.6469,
      "step": 6821
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1068469285964966,
      "learning_rate": 9.365869632921582e-05,
      "loss": 1.0732,
      "step": 6822
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9559067487716675,
      "learning_rate": 9.363475401644927e-05,
      "loss": 1.6445,
      "step": 6823
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3037433624267578,
      "learning_rate": 9.361081207003927e-05,
      "loss": 1.2788,
      "step": 6824
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4253405332565308,
      "learning_rate": 9.358687049136376e-05,
      "loss": 1.8453,
      "step": 6825
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9464484453201294,
      "learning_rate": 9.356292928180073e-05,
      "loss": 0.997,
      "step": 6826
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1509571075439453,
      "learning_rate": 9.35389884427282e-05,
      "loss": 1.1202,
      "step": 6827
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.384227991104126,
      "learning_rate": 9.351504797552403e-05,
      "loss": 1.447,
      "step": 6828
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2330889701843262,
      "learning_rate": 9.349110788156614e-05,
      "loss": 1.5391,
      "step": 6829
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9652594327926636,
      "learning_rate": 9.346716816223245e-05,
      "loss": 1.3012,
      "step": 6830
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.094520092010498,
      "learning_rate": 9.344322881890079e-05,
      "loss": 1.0474,
      "step": 6831
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4185642004013062,
      "learning_rate": 9.341928985294906e-05,
      "loss": 1.6456,
      "step": 6832
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0432995557785034,
      "learning_rate": 9.339535126575505e-05,
      "loss": 1.0894,
      "step": 6833
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1919095516204834,
      "learning_rate": 9.337141305869657e-05,
      "loss": 1.1859,
      "step": 6834
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.717689275741577,
      "learning_rate": 9.334747523315137e-05,
      "loss": 1.409,
      "step": 6835
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0254559516906738,
      "learning_rate": 9.332353779049729e-05,
      "loss": 1.4722,
      "step": 6836
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1761326789855957,
      "learning_rate": 9.329960073211197e-05,
      "loss": 0.8859,
      "step": 6837
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.0465571880340576,
      "learning_rate": 9.327566405937321e-05,
      "loss": 2.0767,
      "step": 6838
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9406437873840332,
      "learning_rate": 9.325172777365867e-05,
      "loss": 0.875,
      "step": 6839
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4587377309799194,
      "learning_rate": 9.322779187634598e-05,
      "loss": 1.6103,
      "step": 6840
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.618734359741211,
      "learning_rate": 9.320385636881283e-05,
      "loss": 1.4429,
      "step": 6841
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9324402809143066,
      "learning_rate": 9.317992125243688e-05,
      "loss": 0.9038,
      "step": 6842
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.151268720626831,
      "learning_rate": 9.315598652859567e-05,
      "loss": 1.2509,
      "step": 6843
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6418485641479492,
      "learning_rate": 9.313205219866679e-05,
      "loss": 1.5272,
      "step": 6844
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8083786964416504,
      "learning_rate": 9.310811826402786e-05,
      "loss": 1.485,
      "step": 6845
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3154678344726562,
      "learning_rate": 9.308418472605633e-05,
      "loss": 1.6594,
      "step": 6846
    },
    {
      "epoch": 0.52,
      "grad_norm": 5.393049240112305,
      "learning_rate": 9.306025158612977e-05,
      "loss": 1.8877,
      "step": 6847
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9501404166221619,
      "learning_rate": 9.303631884562569e-05,
      "loss": 1.1397,
      "step": 6848
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3291668891906738,
      "learning_rate": 9.301238650592146e-05,
      "loss": 1.4335,
      "step": 6849
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.667364239692688,
      "learning_rate": 9.298845456839459e-05,
      "loss": 0.699,
      "step": 6850
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1772894859313965,
      "learning_rate": 9.296452303442253e-05,
      "loss": 1.353,
      "step": 6851
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4091047048568726,
      "learning_rate": 9.294059190538263e-05,
      "loss": 1.205,
      "step": 6852
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3849011659622192,
      "learning_rate": 9.291666118265225e-05,
      "loss": 1.9378,
      "step": 6853
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2816652059555054,
      "learning_rate": 9.289273086760882e-05,
      "loss": 1.2087,
      "step": 6854
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9871429800987244,
      "learning_rate": 9.286880096162956e-05,
      "loss": 1.4798,
      "step": 6855
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.344515085220337,
      "learning_rate": 9.284487146609186e-05,
      "loss": 1.7447,
      "step": 6856
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7944356203079224,
      "learning_rate": 9.282094238237298e-05,
      "loss": 1.8954,
      "step": 6857
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0239123106002808,
      "learning_rate": 9.279701371185012e-05,
      "loss": 1.1245,
      "step": 6858
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.909778356552124,
      "learning_rate": 9.277308545590056e-05,
      "loss": 2.2763,
      "step": 6859
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0082387924194336,
      "learning_rate": 9.274915761590156e-05,
      "loss": 0.8812,
      "step": 6860
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.55515718460083,
      "learning_rate": 9.272523019323019e-05,
      "loss": 1.599,
      "step": 6861
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2831486463546753,
      "learning_rate": 9.270130318926371e-05,
      "loss": 1.4114,
      "step": 6862
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4619978666305542,
      "learning_rate": 9.267737660537923e-05,
      "loss": 0.9382,
      "step": 6863
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4496248960494995,
      "learning_rate": 9.265345044295382e-05,
      "loss": 1.5862,
      "step": 6864
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4253594875335693,
      "learning_rate": 9.262952470336458e-05,
      "loss": 0.8257,
      "step": 6865
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6754878759384155,
      "learning_rate": 9.260559938798865e-05,
      "loss": 0.4085,
      "step": 6866
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6382107734680176,
      "learning_rate": 9.258167449820299e-05,
      "loss": 1.3759,
      "step": 6867
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0046167373657227,
      "learning_rate": 9.255775003538462e-05,
      "loss": 1.3496,
      "step": 6868
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.111085295677185,
      "learning_rate": 9.253382600091059e-05,
      "loss": 1.3454,
      "step": 6869
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9233832359313965,
      "learning_rate": 9.250990239615776e-05,
      "loss": 0.5901,
      "step": 6870
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.13092041015625,
      "learning_rate": 9.248597922250318e-05,
      "loss": 1.0934,
      "step": 6871
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.239114761352539,
      "learning_rate": 9.246205648132374e-05,
      "loss": 1.126,
      "step": 6872
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3112130165100098,
      "learning_rate": 9.243813417399627e-05,
      "loss": 1.2459,
      "step": 6873
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6282308101654053,
      "learning_rate": 9.241421230189767e-05,
      "loss": 1.2604,
      "step": 6874
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2011797428131104,
      "learning_rate": 9.239029086640484e-05,
      "loss": 2.0513,
      "step": 6875
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1082637310028076,
      "learning_rate": 9.236636986889453e-05,
      "loss": 0.8571,
      "step": 6876
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.9122893214225769,
      "learning_rate": 9.234244931074352e-05,
      "loss": 1.4074,
      "step": 6877
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9478622674942017,
      "learning_rate": 9.231852919332865e-05,
      "loss": 1.782,
      "step": 6878
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.140110492706299,
      "learning_rate": 9.229460951802657e-05,
      "loss": 1.3415,
      "step": 6879
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.0427532196044922,
      "learning_rate": 9.227069028621406e-05,
      "loss": 1.1614,
      "step": 6880
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8158507347106934,
      "learning_rate": 9.22467714992678e-05,
      "loss": 1.1687,
      "step": 6881
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1672685146331787,
      "learning_rate": 9.222285315856441e-05,
      "loss": 1.2554,
      "step": 6882
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9183114767074585,
      "learning_rate": 9.219893526548057e-05,
      "loss": 1.7281,
      "step": 6883
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1819998025894165,
      "learning_rate": 9.217501782139291e-05,
      "loss": 1.5359,
      "step": 6884
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0686346292495728,
      "learning_rate": 9.215110082767797e-05,
      "loss": 1.2591,
      "step": 6885
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2518417835235596,
      "learning_rate": 9.212718428571231e-05,
      "loss": 1.1973,
      "step": 6886
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.022303581237793,
      "learning_rate": 9.210326819687253e-05,
      "loss": 1.5311,
      "step": 6887
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6265087127685547,
      "learning_rate": 9.207935256253504e-05,
      "loss": 1.2579,
      "step": 6888
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5864355564117432,
      "learning_rate": 9.205543738407641e-05,
      "loss": 0.9057,
      "step": 6889
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.477502703666687,
      "learning_rate": 9.203152266287306e-05,
      "loss": 1.1988,
      "step": 6890
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.7686431407928467,
      "learning_rate": 9.200760840030141e-05,
      "loss": 1.8863,
      "step": 6891
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.8785578012466431,
      "learning_rate": 9.198369459773785e-05,
      "loss": 0.8942,
      "step": 6892
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3412750959396362,
      "learning_rate": 9.195978125655884e-05,
      "loss": 1.5525,
      "step": 6893
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4101481437683105,
      "learning_rate": 9.193586837814063e-05,
      "loss": 1.2908,
      "step": 6894
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6522443294525146,
      "learning_rate": 9.19119559638596e-05,
      "loss": 1.5435,
      "step": 6895
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9929361939430237,
      "learning_rate": 9.188804401509204e-05,
      "loss": 1.5734,
      "step": 6896
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.296183705329895,
      "learning_rate": 9.186413253321418e-05,
      "loss": 1.5961,
      "step": 6897
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8177733421325684,
      "learning_rate": 9.18402215196023e-05,
      "loss": 1.1179,
      "step": 6898
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0959802865982056,
      "learning_rate": 9.181631097563263e-05,
      "loss": 1.6199,
      "step": 6899
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3813533782958984,
      "learning_rate": 9.179240090268136e-05,
      "loss": 1.8596,
      "step": 6900
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7392712831497192,
      "learning_rate": 9.17684913021246e-05,
      "loss": 1.3988,
      "step": 6901
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3967887163162231,
      "learning_rate": 9.174458217533852e-05,
      "loss": 1.6305,
      "step": 6902
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3785895109176636,
      "learning_rate": 9.172067352369927e-05,
      "loss": 1.6382,
      "step": 6903
    },
    {
      "epoch": 0.53,
      "grad_norm": 7.245920658111572,
      "learning_rate": 9.169676534858285e-05,
      "loss": 1.4801,
      "step": 6904
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.8795868754386902,
      "learning_rate": 9.167285765136533e-05,
      "loss": 0.9285,
      "step": 6905
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5746266841888428,
      "learning_rate": 9.164895043342281e-05,
      "loss": 1.2607,
      "step": 6906
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8587901592254639,
      "learning_rate": 9.162504369613119e-05,
      "loss": 1.7383,
      "step": 6907
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3347902297973633,
      "learning_rate": 9.16011374408665e-05,
      "loss": 1.9036,
      "step": 6908
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7036466598510742,
      "learning_rate": 9.15772316690047e-05,
      "loss": 1.3563,
      "step": 6909
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.450120210647583,
      "learning_rate": 9.15533263819216e-05,
      "loss": 0.9926,
      "step": 6910
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4232197999954224,
      "learning_rate": 9.152942158099318e-05,
      "loss": 0.8194,
      "step": 6911
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.25455641746521,
      "learning_rate": 9.150551726759532e-05,
      "loss": 0.833,
      "step": 6912
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6480259895324707,
      "learning_rate": 9.148161344310377e-05,
      "loss": 1.2705,
      "step": 6913
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.70684015750885,
      "learning_rate": 9.145771010889436e-05,
      "loss": 1.3445,
      "step": 6914
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2772756814956665,
      "learning_rate": 9.143380726634291e-05,
      "loss": 1.3606,
      "step": 6915
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3348662853240967,
      "learning_rate": 9.140990491682511e-05,
      "loss": 1.298,
      "step": 6916
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1473407745361328,
      "learning_rate": 9.13860030617167e-05,
      "loss": 1.0054,
      "step": 6917
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.989709734916687,
      "learning_rate": 9.136210170239338e-05,
      "loss": 2.155,
      "step": 6918
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3003215789794922,
      "learning_rate": 9.133820084023078e-05,
      "loss": 1.5113,
      "step": 6919
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3340505361557007,
      "learning_rate": 9.131430047660454e-05,
      "loss": 1.0321,
      "step": 6920
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.521632194519043,
      "learning_rate": 9.129040061289032e-05,
      "loss": 1.2606,
      "step": 6921
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7115519046783447,
      "learning_rate": 9.126650125046361e-05,
      "loss": 2.0877,
      "step": 6922
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3797547817230225,
      "learning_rate": 9.124260239069999e-05,
      "loss": 1.7306,
      "step": 6923
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1836116313934326,
      "learning_rate": 9.1218704034975e-05,
      "loss": 1.4145,
      "step": 6924
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4488375186920166,
      "learning_rate": 9.119480618466409e-05,
      "loss": 2.0028,
      "step": 6925
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1458442211151123,
      "learning_rate": 9.117090884114276e-05,
      "loss": 2.3655,
      "step": 6926
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5121285915374756,
      "learning_rate": 9.114701200578642e-05,
      "loss": 1.1479,
      "step": 6927
    },
    {
      "epoch": 0.53,
      "grad_norm": 5.136110305786133,
      "learning_rate": 9.112311567997045e-05,
      "loss": 2.4524,
      "step": 6928
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.970311164855957,
      "learning_rate": 9.109921986507023e-05,
      "loss": 1.9348,
      "step": 6929
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1286115646362305,
      "learning_rate": 9.107532456246114e-05,
      "loss": 1.5356,
      "step": 6930
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3133517503738403,
      "learning_rate": 9.105142977351843e-05,
      "loss": 1.4914,
      "step": 6931
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.747212529182434,
      "learning_rate": 9.102753549961744e-05,
      "loss": 1.2096,
      "step": 6932
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.214625597000122,
      "learning_rate": 9.10036417421334e-05,
      "loss": 1.9846,
      "step": 6933
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4181312322616577,
      "learning_rate": 9.09797485024415e-05,
      "loss": 0.8582,
      "step": 6934
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.573657512664795,
      "learning_rate": 9.095585578191697e-05,
      "loss": 1.2448,
      "step": 6935
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9005612134933472,
      "learning_rate": 9.093196358193501e-05,
      "loss": 1.3073,
      "step": 6936
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.7698869705200195,
      "learning_rate": 9.090807190387068e-05,
      "loss": 1.4981,
      "step": 6937
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.447251796722412,
      "learning_rate": 9.08841807490991e-05,
      "loss": 1.0478,
      "step": 6938
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4073235988616943,
      "learning_rate": 9.08602901189954e-05,
      "loss": 1.4748,
      "step": 6939
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.768834352493286,
      "learning_rate": 9.083640001493454e-05,
      "loss": 1.4438,
      "step": 6940
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.393911600112915,
      "learning_rate": 9.08125104382916e-05,
      "loss": 1.8215,
      "step": 6941
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4614744186401367,
      "learning_rate": 9.078862139044156e-05,
      "loss": 1.1617,
      "step": 6942
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5450459718704224,
      "learning_rate": 9.076473287275929e-05,
      "loss": 1.5051,
      "step": 6943
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9252341985702515,
      "learning_rate": 9.074084488661977e-05,
      "loss": 1.1172,
      "step": 6944
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2748303413391113,
      "learning_rate": 9.071695743339795e-05,
      "loss": 1.1229,
      "step": 6945
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4719462394714355,
      "learning_rate": 9.069307051446862e-05,
      "loss": 1.7326,
      "step": 6946
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.387294292449951,
      "learning_rate": 9.066918413120657e-05,
      "loss": 1.5128,
      "step": 6947
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.090442180633545,
      "learning_rate": 9.064529828498673e-05,
      "loss": 1.6001,
      "step": 6948
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3936939239501953,
      "learning_rate": 9.062141297718371e-05,
      "loss": 1.9186,
      "step": 6949
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8266490697860718,
      "learning_rate": 9.059752820917236e-05,
      "loss": 1.009,
      "step": 6950
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9783276319503784,
      "learning_rate": 9.057364398232739e-05,
      "loss": 1.2751,
      "step": 6951
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4039421081542969,
      "learning_rate": 9.054976029802337e-05,
      "loss": 1.4297,
      "step": 6952
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3286864757537842,
      "learning_rate": 9.052587715763501e-05,
      "loss": 1.226,
      "step": 6953
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0036051273345947,
      "learning_rate": 9.050199456253697e-05,
      "loss": 1.1028,
      "step": 6954
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7431150674819946,
      "learning_rate": 9.047811251410376e-05,
      "loss": 1.4951,
      "step": 6955
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5911002159118652,
      "learning_rate": 9.045423101370994e-05,
      "loss": 1.3688,
      "step": 6956
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6900050640106201,
      "learning_rate": 9.043035006273006e-05,
      "loss": 1.5966,
      "step": 6957
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3301291465759277,
      "learning_rate": 9.040646966253856e-05,
      "loss": 1.354,
      "step": 6958
    },
    {
      "epoch": 0.53,
      "grad_norm": 4.3540873527526855,
      "learning_rate": 9.038258981450994e-05,
      "loss": 1.9342,
      "step": 6959
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1969401836395264,
      "learning_rate": 9.03587105200186e-05,
      "loss": 1.5528,
      "step": 6960
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3775025606155396,
      "learning_rate": 9.033483178043894e-05,
      "loss": 1.1639,
      "step": 6961
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1937737464904785,
      "learning_rate": 9.031095359714527e-05,
      "loss": 1.3478,
      "step": 6962
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.31142258644104,
      "learning_rate": 9.028707597151201e-05,
      "loss": 1.8145,
      "step": 6963
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.08555269241333,
      "learning_rate": 9.026319890491336e-05,
      "loss": 1.0501,
      "step": 6964
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.315968632698059,
      "learning_rate": 9.023932239872366e-05,
      "loss": 1.4868,
      "step": 6965
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1754276752471924,
      "learning_rate": 9.021544645431711e-05,
      "loss": 1.567,
      "step": 6966
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1214890480041504,
      "learning_rate": 9.019157107306787e-05,
      "loss": 1.3254,
      "step": 6967
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8021323680877686,
      "learning_rate": 9.016769625635013e-05,
      "loss": 1.7654,
      "step": 6968
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.606816053390503,
      "learning_rate": 9.01438220055381e-05,
      "loss": 1.3725,
      "step": 6969
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0479519367218018,
      "learning_rate": 9.011994832200577e-05,
      "loss": 1.2583,
      "step": 6970
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1421891450881958,
      "learning_rate": 9.009607520712723e-05,
      "loss": 1.3402,
      "step": 6971
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.9832873344421387,
      "learning_rate": 9.007220266227658e-05,
      "loss": 1.6492,
      "step": 6972
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.027570128440857,
      "learning_rate": 9.004833068882774e-05,
      "loss": 1.4805,
      "step": 6973
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4386909008026123,
      "learning_rate": 9.002445928815474e-05,
      "loss": 1.3261,
      "step": 6974
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3901309967041016,
      "learning_rate": 9.000058846163152e-05,
      "loss": 1.594,
      "step": 6975
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0120108127593994,
      "learning_rate": 8.997671821063191e-05,
      "loss": 1.3421,
      "step": 6976
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8084778785705566,
      "learning_rate": 8.995284853652983e-05,
      "loss": 1.581,
      "step": 6977
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.078373908996582,
      "learning_rate": 8.992897944069914e-05,
      "loss": 1.9022,
      "step": 6978
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.9712554216384888,
      "learning_rate": 8.990511092451361e-05,
      "loss": 1.0696,
      "step": 6979
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7017232179641724,
      "learning_rate": 8.9881242989347e-05,
      "loss": 1.6722,
      "step": 6980
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0701931715011597,
      "learning_rate": 8.98573756365731e-05,
      "loss": 0.8486,
      "step": 6981
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2292542457580566,
      "learning_rate": 8.983350886756555e-05,
      "loss": 1.3775,
      "step": 6982
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1669957637786865,
      "learning_rate": 8.980964268369807e-05,
      "loss": 1.1355,
      "step": 6983
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3390443325042725,
      "learning_rate": 8.97857770863443e-05,
      "loss": 1.4081,
      "step": 6984
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4527647495269775,
      "learning_rate": 8.976191207687775e-05,
      "loss": 1.4266,
      "step": 6985
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.8955997824668884,
      "learning_rate": 8.973804765667208e-05,
      "loss": 0.8383,
      "step": 6986
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.695444345474243,
      "learning_rate": 8.971418382710084e-05,
      "loss": 0.5017,
      "step": 6987
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5246998071670532,
      "learning_rate": 8.969032058953746e-05,
      "loss": 1.5278,
      "step": 6988
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.0012547969818115,
      "learning_rate": 8.966645794535543e-05,
      "loss": 1.5337,
      "step": 6989
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2173906564712524,
      "learning_rate": 8.964259589592823e-05,
      "loss": 1.2786,
      "step": 6990
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5068249702453613,
      "learning_rate": 8.961873444262917e-05,
      "loss": 1.9898,
      "step": 6991
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4625027179718018,
      "learning_rate": 8.959487358683167e-05,
      "loss": 1.4435,
      "step": 6992
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.036191940307617,
      "learning_rate": 8.957101332990909e-05,
      "loss": 1.6273,
      "step": 6993
    },
    {
      "epoch": 0.53,
      "grad_norm": 6.487157821655273,
      "learning_rate": 8.954715367323468e-05,
      "loss": 1.8874,
      "step": 6994
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.338987112045288,
      "learning_rate": 8.952329461818166e-05,
      "loss": 0.8368,
      "step": 6995
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.643214464187622,
      "learning_rate": 8.949943616612338e-05,
      "loss": 1.587,
      "step": 6996
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4119529724121094,
      "learning_rate": 8.947557831843289e-05,
      "loss": 1.1268,
      "step": 6997
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8315207958221436,
      "learning_rate": 8.945172107648343e-05,
      "loss": 1.7498,
      "step": 6998
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2138341665267944,
      "learning_rate": 8.942786444164813e-05,
      "loss": 1.1262,
      "step": 6999
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4667415618896484,
      "learning_rate": 8.94040084153e-05,
      "loss": 1.2201,
      "step": 7000
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8428353071212769,
      "learning_rate": 8.938015299881216e-05,
      "loss": 1.0709,
      "step": 7001
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.312852621078491,
      "learning_rate": 8.935629819355764e-05,
      "loss": 1.2547,
      "step": 7002
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9031457901000977,
      "learning_rate": 8.933244400090937e-05,
      "loss": 1.4431,
      "step": 7003
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.5853781700134277,
      "learning_rate": 8.930859042224029e-05,
      "loss": 2.1564,
      "step": 7004
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4349048137664795,
      "learning_rate": 8.928473745892339e-05,
      "loss": 1.6341,
      "step": 7005
    },
    {
      "epoch": 0.53,
      "grad_norm": 4.02851676940918,
      "learning_rate": 8.926088511233143e-05,
      "loss": 3.0587,
      "step": 7006
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5843244791030884,
      "learning_rate": 8.923703338383733e-05,
      "loss": 1.6899,
      "step": 7007
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2948201894760132,
      "learning_rate": 8.921318227481391e-05,
      "loss": 1.39,
      "step": 7008
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.283851146697998,
      "learning_rate": 8.918933178663386e-05,
      "loss": 2.1505,
      "step": 7009
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.0197486877441406,
      "learning_rate": 8.916548192066995e-05,
      "loss": 1.4665,
      "step": 7010
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1845208406448364,
      "learning_rate": 8.914163267829493e-05,
      "loss": 1.3461,
      "step": 7011
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3001937866210938,
      "learning_rate": 8.911778406088139e-05,
      "loss": 1.5211,
      "step": 7012
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.029245138168335,
      "learning_rate": 8.909393606980195e-05,
      "loss": 1.1263,
      "step": 7013
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0978771448135376,
      "learning_rate": 8.907008870642929e-05,
      "loss": 0.8609,
      "step": 7014
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5220329761505127,
      "learning_rate": 8.904624197213585e-05,
      "loss": 1.0832,
      "step": 7015
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.465021014213562,
      "learning_rate": 8.90223958682942e-05,
      "loss": 1.5559,
      "step": 7016
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.223508358001709,
      "learning_rate": 8.899855039627684e-05,
      "loss": 1.5992,
      "step": 7017
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9761934876441956,
      "learning_rate": 8.897470555745619e-05,
      "loss": 1.5519,
      "step": 7018
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3548760414123535,
      "learning_rate": 8.895086135320464e-05,
      "loss": 1.8689,
      "step": 7019
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2030441761016846,
      "learning_rate": 8.892701778489463e-05,
      "loss": 1.0129,
      "step": 7020
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4400614500045776,
      "learning_rate": 8.890317485389838e-05,
      "loss": 1.4471,
      "step": 7021
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.219386100769043,
      "learning_rate": 8.88793325615883e-05,
      "loss": 1.556,
      "step": 7022
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.064487338066101,
      "learning_rate": 8.885549090933661e-05,
      "loss": 1.1146,
      "step": 7023
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4765326976776123,
      "learning_rate": 8.883164989851549e-05,
      "loss": 1.8595,
      "step": 7024
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9758349657058716,
      "learning_rate": 8.880780953049718e-05,
      "loss": 1.5393,
      "step": 7025
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8926187753677368,
      "learning_rate": 8.878396980665384e-05,
      "loss": 1.7005,
      "step": 7026
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3787925243377686,
      "learning_rate": 8.876013072835755e-05,
      "loss": 1.3478,
      "step": 7027
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2193388938903809,
      "learning_rate": 8.873629229698037e-05,
      "loss": 1.5772,
      "step": 7028
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8250013589859009,
      "learning_rate": 8.87124545138944e-05,
      "loss": 0.8666,
      "step": 7029
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0899271965026855,
      "learning_rate": 8.868861738047158e-05,
      "loss": 0.9446,
      "step": 7030
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.1564245223999023,
      "learning_rate": 8.866478089808391e-05,
      "loss": 1.2664,
      "step": 7031
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3754953145980835,
      "learning_rate": 8.864094506810334e-05,
      "loss": 1.7869,
      "step": 7032
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2403595447540283,
      "learning_rate": 8.861710989190167e-05,
      "loss": 1.628,
      "step": 7033
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1844921112060547,
      "learning_rate": 8.859327537085082e-05,
      "loss": 1.4742,
      "step": 7034
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.579917550086975,
      "learning_rate": 8.85694415063226e-05,
      "loss": 1.682,
      "step": 7035
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5512118339538574,
      "learning_rate": 8.85456082996888e-05,
      "loss": 1.5176,
      "step": 7036
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.350816249847412,
      "learning_rate": 8.85217757523211e-05,
      "loss": 1.3199,
      "step": 7037
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1867327690124512,
      "learning_rate": 8.849794386559125e-05,
      "loss": 1.1042,
      "step": 7038
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8867533206939697,
      "learning_rate": 8.847411264087094e-05,
      "loss": 2.1626,
      "step": 7039
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3858890533447266,
      "learning_rate": 8.84502820795317e-05,
      "loss": 1.9859,
      "step": 7040
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.0823111534118652,
      "learning_rate": 8.842645218294516e-05,
      "loss": 1.3904,
      "step": 7041
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1224148273468018,
      "learning_rate": 8.840262295248293e-05,
      "loss": 0.7053,
      "step": 7042
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.096487283706665,
      "learning_rate": 8.837879438951642e-05,
      "loss": 1.6642,
      "step": 7043
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.80428946018219,
      "learning_rate": 8.835496649541716e-05,
      "loss": 1.6907,
      "step": 7044
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.41941499710083,
      "learning_rate": 8.83311392715566e-05,
      "loss": 1.3391,
      "step": 7045
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.556941270828247,
      "learning_rate": 8.830731271930605e-05,
      "loss": 1.0746,
      "step": 7046
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.079442024230957,
      "learning_rate": 8.828348684003691e-05,
      "loss": 1.6343,
      "step": 7047
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8717467784881592,
      "learning_rate": 8.825966163512056e-05,
      "loss": 1.7029,
      "step": 7048
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.371326208114624,
      "learning_rate": 8.823583710592819e-05,
      "loss": 1.5777,
      "step": 7049
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1014920473098755,
      "learning_rate": 8.821201325383106e-05,
      "loss": 0.7093,
      "step": 7050
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.203552722930908,
      "learning_rate": 8.818819008020042e-05,
      "loss": 1.6786,
      "step": 7051
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3090753555297852,
      "learning_rate": 8.816436758640735e-05,
      "loss": 1.446,
      "step": 7052
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.050431489944458,
      "learning_rate": 8.814054577382302e-05,
      "loss": 1.0157,
      "step": 7053
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8522416353225708,
      "learning_rate": 8.811672464381851e-05,
      "loss": 1.5506,
      "step": 7054
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3514704704284668,
      "learning_rate": 8.809290419776488e-05,
      "loss": 1.289,
      "step": 7055
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3551995754241943,
      "learning_rate": 8.806908443703306e-05,
      "loss": 1.6743,
      "step": 7056
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8084640502929688,
      "learning_rate": 8.804526536299413e-05,
      "loss": 1.6508,
      "step": 7057
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3693475723266602,
      "learning_rate": 8.802144697701889e-05,
      "loss": 1.4139,
      "step": 7058
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.950218677520752,
      "learning_rate": 8.799762928047832e-05,
      "loss": 1.7141,
      "step": 7059
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4046478271484375,
      "learning_rate": 8.797381227474324e-05,
      "loss": 1.2898,
      "step": 7060
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2924752235412598,
      "learning_rate": 8.794999596118441e-05,
      "loss": 1.1255,
      "step": 7061
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4134308099746704,
      "learning_rate": 8.792618034117263e-05,
      "loss": 0.9109,
      "step": 7062
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.273790955543518,
      "learning_rate": 8.790236541607868e-05,
      "loss": 1.1395,
      "step": 7063
    },
    {
      "epoch": 0.54,
      "grad_norm": 4.013551235198975,
      "learning_rate": 8.787855118727315e-05,
      "loss": 2.718,
      "step": 7064
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.159367561340332,
      "learning_rate": 8.785473765612672e-05,
      "loss": 1.4729,
      "step": 7065
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0105128288269043,
      "learning_rate": 8.783092482401005e-05,
      "loss": 1.3648,
      "step": 7066
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.779990792274475,
      "learning_rate": 8.780711269229363e-05,
      "loss": 1.4807,
      "step": 7067
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2910168170928955,
      "learning_rate": 8.778330126234803e-05,
      "loss": 1.4473,
      "step": 7068
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4018869400024414,
      "learning_rate": 8.775949053554373e-05,
      "loss": 1.7712,
      "step": 7069
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1905629634857178,
      "learning_rate": 8.773568051325114e-05,
      "loss": 1.2217,
      "step": 7070
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.2088277339935303,
      "learning_rate": 8.771187119684068e-05,
      "loss": 2.085,
      "step": 7071
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0381826162338257,
      "learning_rate": 8.768806258768278e-05,
      "loss": 1.003,
      "step": 7072
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5937598943710327,
      "learning_rate": 8.766425468714767e-05,
      "loss": 1.3676,
      "step": 7073
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2509310245513916,
      "learning_rate": 8.764044749660564e-05,
      "loss": 1.0889,
      "step": 7074
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5834828615188599,
      "learning_rate": 8.7616641017427e-05,
      "loss": 0.8944,
      "step": 7075
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0418741703033447,
      "learning_rate": 8.759283525098187e-05,
      "loss": 1.453,
      "step": 7076
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5842875242233276,
      "learning_rate": 8.756903019864048e-05,
      "loss": 0.9636,
      "step": 7077
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3780577182769775,
      "learning_rate": 8.754522586177292e-05,
      "loss": 1.3366,
      "step": 7078
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3313826322555542,
      "learning_rate": 8.752142224174922e-05,
      "loss": 1.6766,
      "step": 7079
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.9402968883514404,
      "learning_rate": 8.749761933993945e-05,
      "loss": 1.4824,
      "step": 7080
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8456617593765259,
      "learning_rate": 8.747381715771365e-05,
      "loss": 1.4932,
      "step": 7081
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4258660078048706,
      "learning_rate": 8.745001569644173e-05,
      "loss": 1.2484,
      "step": 7082
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.215066909790039,
      "learning_rate": 8.742621495749356e-05,
      "loss": 1.6478,
      "step": 7083
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.8743813037872314,
      "learning_rate": 8.740241494223911e-05,
      "loss": 1.6726,
      "step": 7084
    },
    {
      "epoch": 0.54,
      "grad_norm": 5.198531627655029,
      "learning_rate": 8.73786156520481e-05,
      "loss": 1.9752,
      "step": 7085
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9088334441184998,
      "learning_rate": 8.73548170882904e-05,
      "loss": 1.4405,
      "step": 7086
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.423056125640869,
      "learning_rate": 8.73310192523357e-05,
      "loss": 1.7106,
      "step": 7087
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1312545537948608,
      "learning_rate": 8.730722214555374e-05,
      "loss": 0.8566,
      "step": 7088
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.465254783630371,
      "learning_rate": 8.728342576931413e-05,
      "loss": 1.7172,
      "step": 7089
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4523674249649048,
      "learning_rate": 8.725963012498657e-05,
      "loss": 1.3852,
      "step": 7090
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0887877941131592,
      "learning_rate": 8.723583521394054e-05,
      "loss": 0.8,
      "step": 7091
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.02394437789917,
      "learning_rate": 8.721204103754564e-05,
      "loss": 1.5036,
      "step": 7092
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2496073246002197,
      "learning_rate": 8.718824759717138e-05,
      "loss": 1.8721,
      "step": 7093
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9011280536651611,
      "learning_rate": 8.716445489418712e-05,
      "loss": 1.6807,
      "step": 7094
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.847428560256958,
      "learning_rate": 8.714066292996232e-05,
      "loss": 1.5568,
      "step": 7095
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6391481161117554,
      "learning_rate": 8.711687170586638e-05,
      "loss": 1.4075,
      "step": 7096
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.538524866104126,
      "learning_rate": 8.709308122326858e-05,
      "loss": 1.1696,
      "step": 7097
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.2328696250915527,
      "learning_rate": 8.706929148353817e-05,
      "loss": 1.5582,
      "step": 7098
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.270766258239746,
      "learning_rate": 8.704550248804448e-05,
      "loss": 0.6461,
      "step": 7099
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3658685684204102,
      "learning_rate": 8.702171423815658e-05,
      "loss": 1.0919,
      "step": 7100
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.562434196472168,
      "learning_rate": 8.699792673524371e-05,
      "loss": 0.9991,
      "step": 7101
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8340799808502197,
      "learning_rate": 8.6974139980675e-05,
      "loss": 1.8543,
      "step": 7102
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9983022212982178,
      "learning_rate": 8.695035397581941e-05,
      "loss": 1.729,
      "step": 7103
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.331162452697754,
      "learning_rate": 8.692656872204601e-05,
      "loss": 1.2367,
      "step": 7104
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8762974739074707,
      "learning_rate": 8.690278422072384e-05,
      "loss": 1.3144,
      "step": 7105
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7703722715377808,
      "learning_rate": 8.687900047322176e-05,
      "loss": 1.4208,
      "step": 7106
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.4096667766571045,
      "learning_rate": 8.685521748090866e-05,
      "loss": 1.7372,
      "step": 7107
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7786650657653809,
      "learning_rate": 8.683143524515346e-05,
      "loss": 1.6163,
      "step": 7108
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0173360109329224,
      "learning_rate": 8.680765376732487e-05,
      "loss": 1.11,
      "step": 7109
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4125757217407227,
      "learning_rate": 8.678387304879172e-05,
      "loss": 1.3992,
      "step": 7110
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1702542304992676,
      "learning_rate": 8.676009309092272e-05,
      "loss": 0.9047,
      "step": 7111
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.189848780632019,
      "learning_rate": 8.673631389508648e-05,
      "loss": 1.5355,
      "step": 7112
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.6715199947357178,
      "learning_rate": 8.67125354626517e-05,
      "loss": 1.4945,
      "step": 7113
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9941984415054321,
      "learning_rate": 8.668875779498697e-05,
      "loss": 2.0086,
      "step": 7114
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.9201160669326782,
      "learning_rate": 8.666498089346078e-05,
      "loss": 1.2,
      "step": 7115
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.087719440460205,
      "learning_rate": 8.664120475944163e-05,
      "loss": 0.7439,
      "step": 7116
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5567818880081177,
      "learning_rate": 8.661742939429805e-05,
      "loss": 1.061,
      "step": 7117
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1332879066467285,
      "learning_rate": 8.659365479939834e-05,
      "loss": 1.7086,
      "step": 7118
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5566036701202393,
      "learning_rate": 8.656988097611094e-05,
      "loss": 1.1243,
      "step": 7119
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.407946228981018,
      "learning_rate": 8.654610792580415e-05,
      "loss": 1.4832,
      "step": 7120
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.4365813732147217,
      "learning_rate": 8.652233564984625e-05,
      "loss": 1.1461,
      "step": 7121
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9298384189605713,
      "learning_rate": 8.649856414960545e-05,
      "loss": 1.2008,
      "step": 7122
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.719496250152588,
      "learning_rate": 8.647479342644999e-05,
      "loss": 2.3302,
      "step": 7123
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5913273096084595,
      "learning_rate": 8.645102348174792e-05,
      "loss": 1.7282,
      "step": 7124
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2564517259597778,
      "learning_rate": 8.642725431686743e-05,
      "loss": 1.3794,
      "step": 7125
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2643787860870361,
      "learning_rate": 8.640348593317654e-05,
      "loss": 1.9551,
      "step": 7126
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0273468494415283,
      "learning_rate": 8.637971833204322e-05,
      "loss": 2.1464,
      "step": 7127
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3153223991394043,
      "learning_rate": 8.635595151483545e-05,
      "loss": 1.614,
      "step": 7128
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.581758737564087,
      "learning_rate": 8.633218548292122e-05,
      "loss": 1.4784,
      "step": 7129
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8460792303085327,
      "learning_rate": 8.630842023766831e-05,
      "loss": 1.5183,
      "step": 7130
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9895635843276978,
      "learning_rate": 8.628465578044456e-05,
      "loss": 1.4629,
      "step": 7131
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1187752485275269,
      "learning_rate": 8.626089211261782e-05,
      "loss": 0.9854,
      "step": 7132
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.379210352897644,
      "learning_rate": 8.623712923555572e-05,
      "loss": 0.8475,
      "step": 7133
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.235526442527771,
      "learning_rate": 8.621336715062603e-05,
      "loss": 1.1975,
      "step": 7134
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.126804828643799,
      "learning_rate": 8.61896058591964e-05,
      "loss": 1.9236,
      "step": 7135
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.115023374557495,
      "learning_rate": 8.616584536263434e-05,
      "loss": 1.1578,
      "step": 7136
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1148654222488403,
      "learning_rate": 8.614208566230748e-05,
      "loss": 0.9569,
      "step": 7137
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8821176290512085,
      "learning_rate": 8.611832675958336e-05,
      "loss": 1.7656,
      "step": 7138
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.9860570430755615,
      "learning_rate": 8.609456865582935e-05,
      "loss": 1.6287,
      "step": 7139
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.0822184085845947,
      "learning_rate": 8.607081135241289e-05,
      "loss": 0.7822,
      "step": 7140
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.155022621154785,
      "learning_rate": 8.604705485070143e-05,
      "loss": 1.2414,
      "step": 7141
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2854348421096802,
      "learning_rate": 8.602329915206217e-05,
      "loss": 1.5757,
      "step": 7142
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9162689447402954,
      "learning_rate": 8.599954425786247e-05,
      "loss": 1.1299,
      "step": 7143
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.127687454223633,
      "learning_rate": 8.597579016946954e-05,
      "loss": 1.8037,
      "step": 7144
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3127212524414062,
      "learning_rate": 8.595203688825056e-05,
      "loss": 1.3167,
      "step": 7145
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3945938348770142,
      "learning_rate": 8.592828441557264e-05,
      "loss": 1.5675,
      "step": 7146
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4819387197494507,
      "learning_rate": 8.590453275280297e-05,
      "loss": 1.6364,
      "step": 7147
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5794702768325806,
      "learning_rate": 8.588078190130845e-05,
      "loss": 1.5962,
      "step": 7148
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1406948566436768,
      "learning_rate": 8.585703186245621e-05,
      "loss": 1.0214,
      "step": 7149
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4901812076568604,
      "learning_rate": 8.583328263761316e-05,
      "loss": 1.1003,
      "step": 7150
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0658860206604004,
      "learning_rate": 8.580953422814614e-05,
      "loss": 1.6838,
      "step": 7151
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2239137887954712,
      "learning_rate": 8.578578663542205e-05,
      "loss": 1.1345,
      "step": 7152
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.313737392425537,
      "learning_rate": 8.576203986080777e-05,
      "loss": 2.0928,
      "step": 7153
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.249958872795105,
      "learning_rate": 8.573829390566997e-05,
      "loss": 1.1081,
      "step": 7154
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4496523141860962,
      "learning_rate": 8.571454877137539e-05,
      "loss": 1.6728,
      "step": 7155
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.516353607177734,
      "learning_rate": 8.569080445929073e-05,
      "loss": 2.2171,
      "step": 7156
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.680501937866211,
      "learning_rate": 8.566706097078256e-05,
      "loss": 1.4192,
      "step": 7157
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.993795156478882,
      "learning_rate": 8.56433183072175e-05,
      "loss": 1.1641,
      "step": 7158
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4321571588516235,
      "learning_rate": 8.561957646996209e-05,
      "loss": 1.7131,
      "step": 7159
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0419385433197021,
      "learning_rate": 8.559583546038273e-05,
      "loss": 0.812,
      "step": 7160
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9435675144195557,
      "learning_rate": 8.557209527984592e-05,
      "loss": 1.7853,
      "step": 7161
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1642906665802002,
      "learning_rate": 8.554835592971803e-05,
      "loss": 1.7209,
      "step": 7162
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1616655588150024,
      "learning_rate": 8.552461741136542e-05,
      "loss": 1.9444,
      "step": 7163
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3394432067871094,
      "learning_rate": 8.550087972615429e-05,
      "loss": 1.9137,
      "step": 7164
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4087539911270142,
      "learning_rate": 8.5477142875451e-05,
      "loss": 1.5874,
      "step": 7165
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7230526208877563,
      "learning_rate": 8.545340686062165e-05,
      "loss": 1.3639,
      "step": 7166
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.777618169784546,
      "learning_rate": 8.542967168303241e-05,
      "loss": 1.5136,
      "step": 7167
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9096748232841492,
      "learning_rate": 8.540593734404942e-05,
      "loss": 1.0613,
      "step": 7168
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.038986325263977,
      "learning_rate": 8.538220384503866e-05,
      "loss": 0.6339,
      "step": 7169
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6858100891113281,
      "learning_rate": 8.535847118736613e-05,
      "loss": 0.8844,
      "step": 7170
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3083044290542603,
      "learning_rate": 8.533473937239788e-05,
      "loss": 0.9554,
      "step": 7171
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6847602128982544,
      "learning_rate": 8.531100840149972e-05,
      "loss": 1.7172,
      "step": 7172
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8236857652664185,
      "learning_rate": 8.528727827603749e-05,
      "loss": 2.6538,
      "step": 7173
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.447519302368164,
      "learning_rate": 8.526354899737705e-05,
      "loss": 1.2606,
      "step": 7174
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0459808111190796,
      "learning_rate": 8.523982056688418e-05,
      "loss": 1.0379,
      "step": 7175
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3048683404922485,
      "learning_rate": 8.521609298592452e-05,
      "loss": 1.8369,
      "step": 7176
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.036516547203064,
      "learning_rate": 8.519236625586374e-05,
      "loss": 1.9688,
      "step": 7177
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.206679105758667,
      "learning_rate": 8.51686403780675e-05,
      "loss": 1.4048,
      "step": 7178
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.200349807739258,
      "learning_rate": 8.514491535390129e-05,
      "loss": 1.7565,
      "step": 7179
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3236826658248901,
      "learning_rate": 8.512119118473067e-05,
      "loss": 1.3404,
      "step": 7180
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.411055564880371,
      "learning_rate": 8.509746787192111e-05,
      "loss": 1.9476,
      "step": 7181
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.829756259918213,
      "learning_rate": 8.507374541683799e-05,
      "loss": 2.1976,
      "step": 7182
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9594843983650208,
      "learning_rate": 8.505002382084667e-05,
      "loss": 0.9424,
      "step": 7183
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6800496578216553,
      "learning_rate": 8.502630308531253e-05,
      "loss": 2.119,
      "step": 7184
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.228693723678589,
      "learning_rate": 8.500258321160073e-05,
      "loss": 0.977,
      "step": 7185
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1705986261367798,
      "learning_rate": 8.497886420107657e-05,
      "loss": 1.4104,
      "step": 7186
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.670634150505066,
      "learning_rate": 8.49551460551052e-05,
      "loss": 1.3301,
      "step": 7187
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3889786005020142,
      "learning_rate": 8.49314287750517e-05,
      "loss": 1.4443,
      "step": 7188
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.482606053352356,
      "learning_rate": 8.490771236228115e-05,
      "loss": 1.4403,
      "step": 7189
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8866466283798218,
      "learning_rate": 8.488399681815861e-05,
      "loss": 1.2828,
      "step": 7190
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6584296226501465,
      "learning_rate": 8.486028214404898e-05,
      "loss": 2.1788,
      "step": 7191
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0402904748916626,
      "learning_rate": 8.48365683413172e-05,
      "loss": 0.8406,
      "step": 7192
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1959997415542603,
      "learning_rate": 8.481285541132818e-05,
      "loss": 1.4974,
      "step": 7193
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8566778898239136,
      "learning_rate": 8.478914335544667e-05,
      "loss": 1.6753,
      "step": 7194
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1082561016082764,
      "learning_rate": 8.476543217503748e-05,
      "loss": 1.2756,
      "step": 7195
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.414530873298645,
      "learning_rate": 8.474172187146532e-05,
      "loss": 1.5831,
      "step": 7196
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3186618089675903,
      "learning_rate": 8.47180124460948e-05,
      "loss": 1.9648,
      "step": 7197
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.595874309539795,
      "learning_rate": 8.469430390029058e-05,
      "loss": 1.9217,
      "step": 7198
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.538690209388733,
      "learning_rate": 8.467059623541725e-05,
      "loss": 0.9608,
      "step": 7199
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.816169023513794,
      "learning_rate": 8.464688945283928e-05,
      "loss": 1.667,
      "step": 7200
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0609288215637207,
      "learning_rate": 8.462318355392112e-05,
      "loss": 1.1263,
      "step": 7201
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4262478351593018,
      "learning_rate": 8.459947854002726e-05,
      "loss": 1.7732,
      "step": 7202
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0369545221328735,
      "learning_rate": 8.457577441252195e-05,
      "loss": 1.3323,
      "step": 7203
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.18186616897583,
      "learning_rate": 8.455207117276958e-05,
      "loss": 1.5642,
      "step": 7204
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.7636499404907227,
      "learning_rate": 8.45283688221344e-05,
      "loss": 1.5388,
      "step": 7205
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.233785629272461,
      "learning_rate": 8.450466736198055e-05,
      "loss": 1.4665,
      "step": 7206
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0939040184020996,
      "learning_rate": 8.448096679367225e-05,
      "loss": 1.7015,
      "step": 7207
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.459627151489258,
      "learning_rate": 8.445726711857362e-05,
      "loss": 0.7228,
      "step": 7208
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.4815049171447754,
      "learning_rate": 8.443356833804866e-05,
      "loss": 1.941,
      "step": 7209
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.8572549223899841,
      "learning_rate": 8.440987045346134e-05,
      "loss": 1.0158,
      "step": 7210
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7849100828170776,
      "learning_rate": 8.438617346617574e-05,
      "loss": 2.3891,
      "step": 7211
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4710280895233154,
      "learning_rate": 8.436247737755562e-05,
      "loss": 0.8875,
      "step": 7212
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.237311601638794,
      "learning_rate": 8.433878218896492e-05,
      "loss": 1.0174,
      "step": 7213
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6609036922454834,
      "learning_rate": 8.431508790176739e-05,
      "loss": 1.1153,
      "step": 7214
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5998647212982178,
      "learning_rate": 8.429139451732678e-05,
      "loss": 1.984,
      "step": 7215
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3244069814682007,
      "learning_rate": 8.426770203700676e-05,
      "loss": 1.5107,
      "step": 7216
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.262698173522949,
      "learning_rate": 8.424401046217103e-05,
      "loss": 1.411,
      "step": 7217
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5089999437332153,
      "learning_rate": 8.42203197941831e-05,
      "loss": 1.3576,
      "step": 7218
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.051555871963501,
      "learning_rate": 8.419663003440657e-05,
      "loss": 1.411,
      "step": 7219
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1976685523986816,
      "learning_rate": 8.41729411842049e-05,
      "loss": 1.3964,
      "step": 7220
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8375048637390137,
      "learning_rate": 8.414925324494148e-05,
      "loss": 2.1325,
      "step": 7221
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0274157524108887,
      "learning_rate": 8.412556621797973e-05,
      "loss": 1.4664,
      "step": 7222
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1478923559188843,
      "learning_rate": 8.410188010468298e-05,
      "loss": 1.3573,
      "step": 7223
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.740874171257019,
      "learning_rate": 8.407819490641448e-05,
      "loss": 0.8447,
      "step": 7224
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0683538913726807,
      "learning_rate": 8.405451062453744e-05,
      "loss": 1.2242,
      "step": 7225
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.1149141788482666,
      "learning_rate": 8.403082726041508e-05,
      "loss": 1.268,
      "step": 7226
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7013696432113647,
      "learning_rate": 8.400714481541043e-05,
      "loss": 1.819,
      "step": 7227
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.091660737991333,
      "learning_rate": 8.398346329088664e-05,
      "loss": 1.4979,
      "step": 7228
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4852280616760254,
      "learning_rate": 8.395978268820667e-05,
      "loss": 1.152,
      "step": 7229
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3612239360809326,
      "learning_rate": 8.393610300873345e-05,
      "loss": 1.1996,
      "step": 7230
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9725805521011353,
      "learning_rate": 8.391242425382991e-05,
      "loss": 1.3903,
      "step": 7231
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.699340343475342,
      "learning_rate": 8.388874642485895e-05,
      "loss": 2.8505,
      "step": 7232
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8497340679168701,
      "learning_rate": 8.38650695231833e-05,
      "loss": 1.0303,
      "step": 7233
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2194347381591797,
      "learning_rate": 8.384139355016568e-05,
      "loss": 1.3032,
      "step": 7234
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.02632999420166,
      "learning_rate": 8.381771850716887e-05,
      "loss": 2.0991,
      "step": 7235
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.02557373046875,
      "learning_rate": 8.37940443955554e-05,
      "loss": 1.5623,
      "step": 7236
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.067697048187256,
      "learning_rate": 8.377037121668794e-05,
      "loss": 2.2046,
      "step": 7237
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.103646755218506,
      "learning_rate": 8.374669897192898e-05,
      "loss": 1.2673,
      "step": 7238
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.049095630645752,
      "learning_rate": 8.372302766264097e-05,
      "loss": 0.8142,
      "step": 7239
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.149672031402588,
      "learning_rate": 8.369935729018634e-05,
      "loss": 1.2854,
      "step": 7240
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2601724863052368,
      "learning_rate": 8.367568785592752e-05,
      "loss": 1.3195,
      "step": 7241
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2878286838531494,
      "learning_rate": 8.365201936122675e-05,
      "loss": 1.1495,
      "step": 7242
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7474948167800903,
      "learning_rate": 8.36283518074463e-05,
      "loss": 1.268,
      "step": 7243
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.027554988861084,
      "learning_rate": 8.360468519594842e-05,
      "loss": 2.0351,
      "step": 7244
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8473975658416748,
      "learning_rate": 8.358101952809518e-05,
      "loss": 1.023,
      "step": 7245
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5150315761566162,
      "learning_rate": 8.355735480524874e-05,
      "loss": 1.6105,
      "step": 7246
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4856600761413574,
      "learning_rate": 8.353369102877111e-05,
      "loss": 1.8466,
      "step": 7247
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5891354084014893,
      "learning_rate": 8.35100282000243e-05,
      "loss": 1.8015,
      "step": 7248
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.9282552599906921,
      "learning_rate": 8.348636632037022e-05,
      "loss": 1.3585,
      "step": 7249
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.354928970336914,
      "learning_rate": 8.346270539117078e-05,
      "loss": 1.4166,
      "step": 7250
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5647965669631958,
      "learning_rate": 8.343904541378773e-05,
      "loss": 1.5528,
      "step": 7251
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2032650709152222,
      "learning_rate": 8.341538638958291e-05,
      "loss": 1.9745,
      "step": 7252
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.8857683539390564,
      "learning_rate": 8.339172831991804e-05,
      "loss": 0.7767,
      "step": 7253
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4670944213867188,
      "learning_rate": 8.336807120615471e-05,
      "loss": 1.7766,
      "step": 7254
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.292052149772644,
      "learning_rate": 8.334441504965455e-05,
      "loss": 1.4566,
      "step": 7255
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3232256174087524,
      "learning_rate": 8.332075985177916e-05,
      "loss": 1.6983,
      "step": 7256
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3033368587493896,
      "learning_rate": 8.329710561388999e-05,
      "loss": 1.6309,
      "step": 7257
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0650123357772827,
      "learning_rate": 8.327345233734842e-05,
      "loss": 1.5972,
      "step": 7258
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.268547534942627,
      "learning_rate": 8.324980002351597e-05,
      "loss": 1.915,
      "step": 7259
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2205076217651367,
      "learning_rate": 8.322614867375383e-05,
      "loss": 0.95,
      "step": 7260
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.681506872177124,
      "learning_rate": 8.320249828942333e-05,
      "loss": 2.0651,
      "step": 7261
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.868424654006958,
      "learning_rate": 8.317884887188573e-05,
      "loss": 1.4352,
      "step": 7262
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.1130647659301758,
      "learning_rate": 8.31552004225021e-05,
      "loss": 1.3408,
      "step": 7263
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.0568182468414307,
      "learning_rate": 8.313155294263358e-05,
      "loss": 1.2633,
      "step": 7264
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.553922414779663,
      "learning_rate": 8.310790643364127e-05,
      "loss": 1.3701,
      "step": 7265
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.580688238143921,
      "learning_rate": 8.30842608968861e-05,
      "loss": 1.3223,
      "step": 7266
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.073634386062622,
      "learning_rate": 8.306061633372902e-05,
      "loss": 1.6752,
      "step": 7267
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.579892635345459,
      "learning_rate": 8.303697274553094e-05,
      "loss": 0.789,
      "step": 7268
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2338612079620361,
      "learning_rate": 8.301333013365262e-05,
      "loss": 0.8055,
      "step": 7269
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6892198324203491,
      "learning_rate": 8.29896884994549e-05,
      "loss": 1.816,
      "step": 7270
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2873740196228027,
      "learning_rate": 8.296604784429847e-05,
      "loss": 1.6145,
      "step": 7271
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2120907306671143,
      "learning_rate": 8.294240816954395e-05,
      "loss": 1.4655,
      "step": 7272
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4210622310638428,
      "learning_rate": 8.291876947655196e-05,
      "loss": 1.5844,
      "step": 7273
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5289480686187744,
      "learning_rate": 8.28951317666831e-05,
      "loss": 1.4181,
      "step": 7274
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.327802062034607,
      "learning_rate": 8.287149504129778e-05,
      "loss": 1.1169,
      "step": 7275
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1572606563568115,
      "learning_rate": 8.284785930175642e-05,
      "loss": 1.1136,
      "step": 7276
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.439582347869873,
      "learning_rate": 8.28242245494195e-05,
      "loss": 1.521,
      "step": 7277
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0983657836914062,
      "learning_rate": 8.280059078564719e-05,
      "loss": 1.3121,
      "step": 7278
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3768715858459473,
      "learning_rate": 8.277695801179985e-05,
      "loss": 2.6057,
      "step": 7279
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2937604188919067,
      "learning_rate": 8.275332622923769e-05,
      "loss": 1.3651,
      "step": 7280
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6796133518218994,
      "learning_rate": 8.272969543932078e-05,
      "loss": 1.4808,
      "step": 7281
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.641493320465088,
      "learning_rate": 8.270606564340924e-05,
      "loss": 1.9203,
      "step": 7282
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8162457942962646,
      "learning_rate": 8.268243684286316e-05,
      "loss": 1.5127,
      "step": 7283
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2147433757781982,
      "learning_rate": 8.26588090390424e-05,
      "loss": 1.3148,
      "step": 7284
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2174582481384277,
      "learning_rate": 8.263518223330697e-05,
      "loss": 0.8728,
      "step": 7285
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7498418092727661,
      "learning_rate": 8.261155642701673e-05,
      "loss": 1.3244,
      "step": 7286
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4015547037124634,
      "learning_rate": 8.258793162153138e-05,
      "loss": 1.7206,
      "step": 7287
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.370286464691162,
      "learning_rate": 8.256430781821074e-05,
      "loss": 1.5505,
      "step": 7288
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9803071022033691,
      "learning_rate": 8.254068501841454e-05,
      "loss": 1.3596,
      "step": 7289
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2432299852371216,
      "learning_rate": 8.251706322350234e-05,
      "loss": 0.8154,
      "step": 7290
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1923372745513916,
      "learning_rate": 8.24934424348337e-05,
      "loss": 0.8787,
      "step": 7291
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.262816309928894,
      "learning_rate": 8.246982265376819e-05,
      "loss": 1.3094,
      "step": 7292
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1599149703979492,
      "learning_rate": 8.244620388166519e-05,
      "loss": 1.157,
      "step": 7293
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5041472911834717,
      "learning_rate": 8.242258611988416e-05,
      "loss": 1.9377,
      "step": 7294
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8666352033615112,
      "learning_rate": 8.239896936978444e-05,
      "loss": 1.9962,
      "step": 7295
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1641230583190918,
      "learning_rate": 8.237535363272525e-05,
      "loss": 1.362,
      "step": 7296
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2399377822875977,
      "learning_rate": 8.235173891006584e-05,
      "loss": 1.2491,
      "step": 7297
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4444572925567627,
      "learning_rate": 8.232812520316542e-05,
      "loss": 1.587,
      "step": 7298
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3218294382095337,
      "learning_rate": 8.230451251338304e-05,
      "loss": 1.2859,
      "step": 7299
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1787958145141602,
      "learning_rate": 8.228090084207774e-05,
      "loss": 1.3407,
      "step": 7300
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.706191897392273,
      "learning_rate": 8.225729019060856e-05,
      "loss": 0.7283,
      "step": 7301
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.387233018875122,
      "learning_rate": 8.223368056033438e-05,
      "loss": 1.525,
      "step": 7302
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2355446815490723,
      "learning_rate": 8.221007195261407e-05,
      "loss": 0.932,
      "step": 7303
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0390279293060303,
      "learning_rate": 8.21864643688065e-05,
      "loss": 1.5484,
      "step": 7304
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.948258101940155,
      "learning_rate": 8.216285781027036e-05,
      "loss": 1.2537,
      "step": 7305
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6600000858306885,
      "learning_rate": 8.213925227836434e-05,
      "loss": 1.3757,
      "step": 7306
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3603222370147705,
      "learning_rate": 8.211564777444716e-05,
      "loss": 1.2142,
      "step": 7307
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6371244192123413,
      "learning_rate": 8.209204429987727e-05,
      "loss": 1.3161,
      "step": 7308
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0745607614517212,
      "learning_rate": 8.206844185601327e-05,
      "loss": 1.1364,
      "step": 7309
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4599621295928955,
      "learning_rate": 8.20448404442136e-05,
      "loss": 1.8518,
      "step": 7310
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5760977268218994,
      "learning_rate": 8.202124006583668e-05,
      "loss": 0.9861,
      "step": 7311
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9531556367874146,
      "learning_rate": 8.199764072224077e-05,
      "loss": 1.4185,
      "step": 7312
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.1219186782836914,
      "learning_rate": 8.197404241478423e-05,
      "loss": 1.6798,
      "step": 7313
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3112952709197998,
      "learning_rate": 8.195044514482527e-05,
      "loss": 1.4677,
      "step": 7314
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.9793760776519775,
      "learning_rate": 8.192684891372198e-05,
      "loss": 1.6297,
      "step": 7315
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4005475044250488,
      "learning_rate": 8.190325372283249e-05,
      "loss": 1.2778,
      "step": 7316
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.983182430267334,
      "learning_rate": 8.187965957351491e-05,
      "loss": 1.3573,
      "step": 7317
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4398469924926758,
      "learning_rate": 8.185606646712714e-05,
      "loss": 1.5141,
      "step": 7318
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0324642658233643,
      "learning_rate": 8.18324744050271e-05,
      "loss": 0.5944,
      "step": 7319
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5754457712173462,
      "learning_rate": 8.180888338857271e-05,
      "loss": 1.2166,
      "step": 7320
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.615051507949829,
      "learning_rate": 8.178529341912169e-05,
      "loss": 1.266,
      "step": 7321
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.495025157928467,
      "learning_rate": 8.176170449803184e-05,
      "loss": 1.9059,
      "step": 7322
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.078049898147583,
      "learning_rate": 8.173811662666083e-05,
      "loss": 1.2034,
      "step": 7323
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0651999711990356,
      "learning_rate": 8.171452980636623e-05,
      "loss": 0.8179,
      "step": 7324
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5769175291061401,
      "learning_rate": 8.169094403850562e-05,
      "loss": 1.8467,
      "step": 7325
    },
    {
      "epoch": 0.56,
      "grad_norm": 4.71586275100708,
      "learning_rate": 8.166735932443655e-05,
      "loss": 1.4539,
      "step": 7326
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.790998935699463,
      "learning_rate": 8.16437756655164e-05,
      "loss": 1.3112,
      "step": 7327
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0861502885818481,
      "learning_rate": 8.162019306310254e-05,
      "loss": 1.2655,
      "step": 7328
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6481339931488037,
      "learning_rate": 8.159661151855232e-05,
      "loss": 1.2467,
      "step": 7329
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.354688882827759,
      "learning_rate": 8.157303103322296e-05,
      "loss": 1.665,
      "step": 7330
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6561334133148193,
      "learning_rate": 8.154945160847168e-05,
      "loss": 0.9853,
      "step": 7331
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.762544870376587,
      "learning_rate": 8.152587324565561e-05,
      "loss": 1.7438,
      "step": 7332
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7255772352218628,
      "learning_rate": 8.150229594613178e-05,
      "loss": 1.3943,
      "step": 7333
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.6696462631225586,
      "learning_rate": 8.147871971125723e-05,
      "loss": 1.4987,
      "step": 7334
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6065218448638916,
      "learning_rate": 8.145514454238895e-05,
      "loss": 1.5522,
      "step": 7335
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4303655624389648,
      "learning_rate": 8.143157044088377e-05,
      "loss": 1.3356,
      "step": 7336
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.433875560760498,
      "learning_rate": 8.14079974080985e-05,
      "loss": 1.2569,
      "step": 7337
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6823828220367432,
      "learning_rate": 8.138442544538998e-05,
      "loss": 1.696,
      "step": 7338
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5089447498321533,
      "learning_rate": 8.13608545541148e-05,
      "loss": 1.2869,
      "step": 7339
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.45263409614563,
      "learning_rate": 8.133728473562971e-05,
      "loss": 2.2416,
      "step": 7340
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9991543889045715,
      "learning_rate": 8.131371599129125e-05,
      "loss": 1.2581,
      "step": 7341
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0499528646469116,
      "learning_rate": 8.129014832245592e-05,
      "loss": 1.2973,
      "step": 7342
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.108720541000366,
      "learning_rate": 8.126658173048016e-05,
      "loss": 1.6666,
      "step": 7343
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4560562372207642,
      "learning_rate": 8.124301621672044e-05,
      "loss": 1.6364,
      "step": 7344
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5690627098083496,
      "learning_rate": 8.1219451782533e-05,
      "loss": 0.9224,
      "step": 7345
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.362204670906067,
      "learning_rate": 8.119588842927416e-05,
      "loss": 1.1645,
      "step": 7346
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.363097906112671,
      "learning_rate": 8.117232615830014e-05,
      "loss": 1.5707,
      "step": 7347
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5055902004241943,
      "learning_rate": 8.114876497096701e-05,
      "loss": 1.3025,
      "step": 7348
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7957637310028076,
      "learning_rate": 8.112520486863091e-05,
      "loss": 1.2116,
      "step": 7349
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3626514673233032,
      "learning_rate": 8.110164585264789e-05,
      "loss": 1.6574,
      "step": 7350
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.817379951477051,
      "learning_rate": 8.107808792437386e-05,
      "loss": 1.8196,
      "step": 7351
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.915683627128601,
      "learning_rate": 8.105453108516467e-05,
      "loss": 1.728,
      "step": 7352
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1261426210403442,
      "learning_rate": 8.103097533637627e-05,
      "loss": 1.9705,
      "step": 7353
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.037238597869873,
      "learning_rate": 8.100742067936431e-05,
      "loss": 1.9172,
      "step": 7354
    },
    {
      "epoch": 0.56,
      "grad_norm": 4.861247539520264,
      "learning_rate": 8.098386711548458e-05,
      "loss": 2.6245,
      "step": 7355
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5445139408111572,
      "learning_rate": 8.09603146460927e-05,
      "loss": 0.966,
      "step": 7356
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.892512083053589,
      "learning_rate": 8.093676327254421e-05,
      "loss": 2.4712,
      "step": 7357
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.523794412612915,
      "learning_rate": 8.091321299619466e-05,
      "loss": 1.1809,
      "step": 7358
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0993475914001465,
      "learning_rate": 8.088966381839953e-05,
      "loss": 1.3161,
      "step": 7359
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5268574953079224,
      "learning_rate": 8.086611574051417e-05,
      "loss": 1.4708,
      "step": 7360
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.7551467418670654,
      "learning_rate": 8.08425687638939e-05,
      "loss": 1.4924,
      "step": 7361
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.040389060974121,
      "learning_rate": 8.081902288989404e-05,
      "loss": 1.5138,
      "step": 7362
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2577282190322876,
      "learning_rate": 8.079547811986972e-05,
      "loss": 1.6521,
      "step": 7363
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4472851753234863,
      "learning_rate": 8.077193445517614e-05,
      "loss": 1.529,
      "step": 7364
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.14599609375,
      "learning_rate": 8.074839189716836e-05,
      "loss": 1.2707,
      "step": 7365
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6238163709640503,
      "learning_rate": 8.072485044720133e-05,
      "loss": 1.5165,
      "step": 7366
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0655970573425293,
      "learning_rate": 8.070131010663003e-05,
      "loss": 1.2841,
      "step": 7367
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1992404460906982,
      "learning_rate": 8.067777087680941e-05,
      "loss": 0.8908,
      "step": 7368
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.778273344039917,
      "learning_rate": 8.06542327590942e-05,
      "loss": 1.6256,
      "step": 7369
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4969959259033203,
      "learning_rate": 8.063069575483916e-05,
      "loss": 0.6229,
      "step": 7370
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2035505771636963,
      "learning_rate": 8.060715986539906e-05,
      "loss": 1.6343,
      "step": 7371
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2055730819702148,
      "learning_rate": 8.058362509212843e-05,
      "loss": 1.4888,
      "step": 7372
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0341569185256958,
      "learning_rate": 8.056009143638189e-05,
      "loss": 1.1035,
      "step": 7373
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.536447048187256,
      "learning_rate": 8.053655889951391e-05,
      "loss": 1.3849,
      "step": 7374
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1364407539367676,
      "learning_rate": 8.051302748287895e-05,
      "loss": 1.4921,
      "step": 7375
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.512199878692627,
      "learning_rate": 8.048949718783132e-05,
      "loss": 1.1732,
      "step": 7376
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.0739641189575195,
      "learning_rate": 8.046596801572542e-05,
      "loss": 1.4116,
      "step": 7377
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4007534980773926,
      "learning_rate": 8.044243996791538e-05,
      "loss": 1.9308,
      "step": 7378
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4963696002960205,
      "learning_rate": 8.041891304575546e-05,
      "loss": 2.0045,
      "step": 7379
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9663907885551453,
      "learning_rate": 8.039538725059976e-05,
      "loss": 0.9426,
      "step": 7380
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.447706937789917,
      "learning_rate": 8.037186258380226e-05,
      "loss": 1.3615,
      "step": 7381
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9179477691650391,
      "learning_rate": 8.034833904671698e-05,
      "loss": 1.1715,
      "step": 7382
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6219944953918457,
      "learning_rate": 8.032481664069788e-05,
      "loss": 1.8076,
      "step": 7383
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7107566595077515,
      "learning_rate": 8.030129536709874e-05,
      "loss": 1.6953,
      "step": 7384
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9737257957458496,
      "learning_rate": 8.027777522727336e-05,
      "loss": 1.1538,
      "step": 7385
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7120158672332764,
      "learning_rate": 8.025425622257551e-05,
      "loss": 1.499,
      "step": 7386
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2612841129302979,
      "learning_rate": 8.023073835435877e-05,
      "loss": 0.464,
      "step": 7387
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2034554481506348,
      "learning_rate": 8.020722162397678e-05,
      "loss": 1.3124,
      "step": 7388
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8872274160385132,
      "learning_rate": 8.018370603278307e-05,
      "loss": 1.346,
      "step": 7389
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3316025733947754,
      "learning_rate": 8.016019158213101e-05,
      "loss": 1.3813,
      "step": 7390
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8694746494293213,
      "learning_rate": 8.013667827337409e-05,
      "loss": 1.756,
      "step": 7391
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3060587644577026,
      "learning_rate": 8.011316610786563e-05,
      "loss": 1.3279,
      "step": 7392
    },
    {
      "epoch": 0.56,
      "grad_norm": 4.310205459594727,
      "learning_rate": 8.008965508695885e-05,
      "loss": 1.3807,
      "step": 7393
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.200953722000122,
      "learning_rate": 8.006614521200693e-05,
      "loss": 1.2426,
      "step": 7394
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3148024082183838,
      "learning_rate": 8.004263648436306e-05,
      "loss": 1.356,
      "step": 7395
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.317044496536255,
      "learning_rate": 8.001912890538023e-05,
      "loss": 1.295,
      "step": 7396
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.28338623046875,
      "learning_rate": 7.999562247641152e-05,
      "loss": 1.1706,
      "step": 7397
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.1486027240753174,
      "learning_rate": 7.997211719880982e-05,
      "loss": 1.8704,
      "step": 7398
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.465156078338623,
      "learning_rate": 7.994861307392794e-05,
      "loss": 1.3617,
      "step": 7399
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3005783557891846,
      "learning_rate": 7.992511010311872e-05,
      "loss": 1.5268,
      "step": 7400
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.9682207703590393,
      "learning_rate": 7.990160828773497e-05,
      "loss": 1.4121,
      "step": 7401
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0423781871795654,
      "learning_rate": 7.987810762912924e-05,
      "loss": 1.3191,
      "step": 7402
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.369154214859009,
      "learning_rate": 7.985460812865415e-05,
      "loss": 1.6211,
      "step": 7403
    },
    {
      "epoch": 0.56,
      "grad_norm": 5.4598917961120605,
      "learning_rate": 7.98311097876623e-05,
      "loss": 1.4686,
      "step": 7404
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3725532293319702,
      "learning_rate": 7.980761260750607e-05,
      "loss": 1.0332,
      "step": 7405
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.587584972381592,
      "learning_rate": 7.97841165895379e-05,
      "loss": 1.7353,
      "step": 7406
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3678524494171143,
      "learning_rate": 7.976062173511011e-05,
      "loss": 1.1234,
      "step": 7407
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.0588700771331787,
      "learning_rate": 7.973712804557501e-05,
      "loss": 1.5397,
      "step": 7408
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9473855495452881,
      "learning_rate": 7.97136355222847e-05,
      "loss": 1.2816,
      "step": 7409
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5917508602142334,
      "learning_rate": 7.969014416659142e-05,
      "loss": 1.4781,
      "step": 7410
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.734219789505005,
      "learning_rate": 7.966665397984714e-05,
      "loss": 1.1313,
      "step": 7411
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2941735982894897,
      "learning_rate": 7.964316496340392e-05,
      "loss": 1.3428,
      "step": 7412
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.43342125415802,
      "learning_rate": 7.961967711861367e-05,
      "loss": 1.4544,
      "step": 7413
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4294029474258423,
      "learning_rate": 7.95961904468282e-05,
      "loss": 1.3005,
      "step": 7414
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4088728427886963,
      "learning_rate": 7.957270494939935e-05,
      "loss": 1.3268,
      "step": 7415
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.279532790184021,
      "learning_rate": 7.95492206276789e-05,
      "loss": 1.724,
      "step": 7416
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1409553289413452,
      "learning_rate": 7.952573748301841e-05,
      "loss": 1.2097,
      "step": 7417
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7599046230316162,
      "learning_rate": 7.950225551676951e-05,
      "loss": 0.8872,
      "step": 7418
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2600433826446533,
      "learning_rate": 7.947877473028375e-05,
      "loss": 1.3571,
      "step": 7419
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9518380165100098,
      "learning_rate": 7.945529512491251e-05,
      "loss": 1.0568,
      "step": 7420
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7874349355697632,
      "learning_rate": 7.943181670200724e-05,
      "loss": 1.5511,
      "step": 7421
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3056414127349854,
      "learning_rate": 7.940833946291928e-05,
      "loss": 1.7678,
      "step": 7422
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2123911380767822,
      "learning_rate": 7.938486340899978e-05,
      "loss": 1.5693,
      "step": 7423
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.518221378326416,
      "learning_rate": 7.93613885416e-05,
      "loss": 1.5877,
      "step": 7424
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3321681022644043,
      "learning_rate": 7.933791486207107e-05,
      "loss": 1.8339,
      "step": 7425
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4803622961044312,
      "learning_rate": 7.931444237176398e-05,
      "loss": 1.7414,
      "step": 7426
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0253924131393433,
      "learning_rate": 7.929097107202972e-05,
      "loss": 1.4117,
      "step": 7427
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.281224012374878,
      "learning_rate": 7.926750096421924e-05,
      "loss": 1.5071,
      "step": 7428
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.639245867729187,
      "learning_rate": 7.92440320496833e-05,
      "loss": 1.1978,
      "step": 7429
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5344077348709106,
      "learning_rate": 7.922056432977275e-05,
      "loss": 1.6614,
      "step": 7430
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4976874589920044,
      "learning_rate": 7.919709780583828e-05,
      "loss": 1.7306,
      "step": 7431
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0838017463684082,
      "learning_rate": 7.917363247923046e-05,
      "loss": 1.2087,
      "step": 7432
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.817797303199768,
      "learning_rate": 7.915016835129989e-05,
      "loss": 0.9012,
      "step": 7433
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7258025407791138,
      "learning_rate": 7.912670542339713e-05,
      "loss": 1.2678,
      "step": 7434
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3094297647476196,
      "learning_rate": 7.91032436968725e-05,
      "loss": 1.4119,
      "step": 7435
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.722092866897583,
      "learning_rate": 7.907978317307643e-05,
      "loss": 1.4695,
      "step": 7436
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2849128246307373,
      "learning_rate": 7.90563238533592e-05,
      "loss": 1.5341,
      "step": 7437
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7349947690963745,
      "learning_rate": 7.903286573907098e-05,
      "loss": 1.5177,
      "step": 7438
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.421630859375,
      "learning_rate": 7.900940883156196e-05,
      "loss": 1.953,
      "step": 7439
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.611541748046875,
      "learning_rate": 7.898595313218225e-05,
      "loss": 1.6893,
      "step": 7440
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.059082269668579,
      "learning_rate": 7.89624986422818e-05,
      "loss": 1.3728,
      "step": 7441
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3240361213684082,
      "learning_rate": 7.893904536321055e-05,
      "loss": 1.4125,
      "step": 7442
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1691069602966309,
      "learning_rate": 7.891559329631845e-05,
      "loss": 1.0901,
      "step": 7443
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.849977731704712,
      "learning_rate": 7.88921424429552e-05,
      "loss": 1.9574,
      "step": 7444
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5554144382476807,
      "learning_rate": 7.88686928044706e-05,
      "loss": 1.075,
      "step": 7445
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9737269878387451,
      "learning_rate": 7.884524438221428e-05,
      "loss": 1.4875,
      "step": 7446
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9529958367347717,
      "learning_rate": 7.882179717753587e-05,
      "loss": 1.0539,
      "step": 7447
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1851654052734375,
      "learning_rate": 7.879835119178483e-05,
      "loss": 1.3704,
      "step": 7448
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2230650186538696,
      "learning_rate": 7.877490642631065e-05,
      "loss": 1.5994,
      "step": 7449
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.968529462814331,
      "learning_rate": 7.875146288246275e-05,
      "loss": 1.2489,
      "step": 7450
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0682017803192139,
      "learning_rate": 7.872802056159035e-05,
      "loss": 1.0632,
      "step": 7451
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8759689331054688,
      "learning_rate": 7.870457946504273e-05,
      "loss": 1.3361,
      "step": 7452
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9853242635726929,
      "learning_rate": 7.868113959416912e-05,
      "loss": 1.3069,
      "step": 7453
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5179005861282349,
      "learning_rate": 7.865770095031853e-05,
      "loss": 1.7741,
      "step": 7454
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8153414726257324,
      "learning_rate": 7.863426353484002e-05,
      "loss": 1.1902,
      "step": 7455
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.9684383869171143,
      "learning_rate": 7.861082734908259e-05,
      "loss": 1.9504,
      "step": 7456
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0599358081817627,
      "learning_rate": 7.858739239439505e-05,
      "loss": 1.3995,
      "step": 7457
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.131349802017212,
      "learning_rate": 7.856395867212628e-05,
      "loss": 1.0242,
      "step": 7458
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3600021600723267,
      "learning_rate": 7.854052618362504e-05,
      "loss": 1.373,
      "step": 7459
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4457863569259644,
      "learning_rate": 7.85170949302399e-05,
      "loss": 1.0079,
      "step": 7460
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9177309274673462,
      "learning_rate": 7.849366491331954e-05,
      "loss": 2.0309,
      "step": 7461
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5183030366897583,
      "learning_rate": 7.847023613421251e-05,
      "loss": 0.9122,
      "step": 7462
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5606656074523926,
      "learning_rate": 7.844680859426725e-05,
      "loss": 1.3702,
      "step": 7463
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4024267196655273,
      "learning_rate": 7.842338229483212e-05,
      "loss": 1.6107,
      "step": 7464
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1269304752349854,
      "learning_rate": 7.839995723725548e-05,
      "loss": 1.6339,
      "step": 7465
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.257769227027893,
      "learning_rate": 7.837653342288554e-05,
      "loss": 1.9019,
      "step": 7466
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.225571632385254,
      "learning_rate": 7.835311085307051e-05,
      "loss": 1.4706,
      "step": 7467
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.8325750231742859,
      "learning_rate": 7.832968952915848e-05,
      "loss": 1.0176,
      "step": 7468
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.205247640609741,
      "learning_rate": 7.830626945249748e-05,
      "loss": 1.6148,
      "step": 7469
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4370397329330444,
      "learning_rate": 7.828285062443544e-05,
      "loss": 1.5527,
      "step": 7470
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.623945713043213,
      "learning_rate": 7.825943304632033e-05,
      "loss": 1.423,
      "step": 7471
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9805347919464111,
      "learning_rate": 7.823601671949988e-05,
      "loss": 1.7021,
      "step": 7472
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3292157649993896,
      "learning_rate": 7.821260164532188e-05,
      "loss": 1.1369,
      "step": 7473
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3807597160339355,
      "learning_rate": 7.8189187825134e-05,
      "loss": 1.071,
      "step": 7474
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1602445840835571,
      "learning_rate": 7.816577526028381e-05,
      "loss": 1.5683,
      "step": 7475
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1193437576293945,
      "learning_rate": 7.814236395211885e-05,
      "loss": 0.9101,
      "step": 7476
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0405937433242798,
      "learning_rate": 7.811895390198663e-05,
      "loss": 1.1214,
      "step": 7477
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9506744146347046,
      "learning_rate": 7.809554511123446e-05,
      "loss": 1.5642,
      "step": 7478
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7294280529022217,
      "learning_rate": 7.807213758120966e-05,
      "loss": 1.3254,
      "step": 7479
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2117894887924194,
      "learning_rate": 7.804873131325954e-05,
      "loss": 1.4179,
      "step": 7480
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4278576374053955,
      "learning_rate": 7.802532630873116e-05,
      "loss": 1.2263,
      "step": 7481
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.411315679550171,
      "learning_rate": 7.800192256897167e-05,
      "loss": 1.1717,
      "step": 7482
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1288906335830688,
      "learning_rate": 7.797852009532814e-05,
      "loss": 1.5956,
      "step": 7483
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4030460119247437,
      "learning_rate": 7.79551188891474e-05,
      "loss": 1.649,
      "step": 7484
    },
    {
      "epoch": 0.57,
      "grad_norm": 6.077075004577637,
      "learning_rate": 7.793171895177638e-05,
      "loss": 2.1639,
      "step": 7485
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1607540845870972,
      "learning_rate": 7.790832028456193e-05,
      "loss": 1.4608,
      "step": 7486
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.143710970878601,
      "learning_rate": 7.788492288885072e-05,
      "loss": 0.9893,
      "step": 7487
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2580881118774414,
      "learning_rate": 7.78615267659894e-05,
      "loss": 1.4175,
      "step": 7488
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8916289806365967,
      "learning_rate": 7.78381319173246e-05,
      "loss": 1.4937,
      "step": 7489
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4768625497817993,
      "learning_rate": 7.781473834420276e-05,
      "loss": 1.3102,
      "step": 7490
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.4992809295654297,
      "learning_rate": 7.779134604797038e-05,
      "loss": 1.7104,
      "step": 7491
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5437638759613037,
      "learning_rate": 7.776795502997381e-05,
      "loss": 1.5146,
      "step": 7492
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9539449214935303,
      "learning_rate": 7.774456529155928e-05,
      "loss": 1.5027,
      "step": 7493
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3338983058929443,
      "learning_rate": 7.772117683407306e-05,
      "loss": 2.0541,
      "step": 7494
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7032883167266846,
      "learning_rate": 7.76977896588613e-05,
      "loss": 1.9302,
      "step": 7495
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4703915119171143,
      "learning_rate": 7.767440376727003e-05,
      "loss": 1.7273,
      "step": 7496
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4984644651412964,
      "learning_rate": 7.765101916064523e-05,
      "loss": 1.0189,
      "step": 7497
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0878373384475708,
      "learning_rate": 7.76276358403329e-05,
      "loss": 0.9028,
      "step": 7498
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.331475257873535,
      "learning_rate": 7.760425380767876e-05,
      "loss": 1.4113,
      "step": 7499
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0536723136901855,
      "learning_rate": 7.758087306402868e-05,
      "loss": 0.9123,
      "step": 7500
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2220169305801392,
      "learning_rate": 7.755749361072834e-05,
      "loss": 1.7929,
      "step": 7501
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3431580066680908,
      "learning_rate": 7.753411544912333e-05,
      "loss": 1.505,
      "step": 7502
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3320282697677612,
      "learning_rate": 7.75107385805592e-05,
      "loss": 1.3946,
      "step": 7503
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.24109947681427,
      "learning_rate": 7.748736300638149e-05,
      "loss": 1.295,
      "step": 7504
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7566875219345093,
      "learning_rate": 7.74639887279355e-05,
      "loss": 1.8821,
      "step": 7505
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1073107719421387,
      "learning_rate": 7.744061574656663e-05,
      "loss": 1.0669,
      "step": 7506
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.429857611656189,
      "learning_rate": 7.74172440636201e-05,
      "loss": 1.5397,
      "step": 7507
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2052161693572998,
      "learning_rate": 7.739387368044106e-05,
      "loss": 1.3527,
      "step": 7508
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2744179964065552,
      "learning_rate": 7.737050459837463e-05,
      "loss": 1.537,
      "step": 7509
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.324874758720398,
      "learning_rate": 7.734713681876589e-05,
      "loss": 1.7209,
      "step": 7510
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.605366587638855,
      "learning_rate": 7.73237703429597e-05,
      "loss": 1.3468,
      "step": 7511
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9009052515029907,
      "learning_rate": 7.730040517230097e-05,
      "loss": 1.3062,
      "step": 7512
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7030553817749023,
      "learning_rate": 7.727704130813455e-05,
      "loss": 1.6419,
      "step": 7513
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1529539823532104,
      "learning_rate": 7.725367875180507e-05,
      "loss": 1.0924,
      "step": 7514
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2180215120315552,
      "learning_rate": 7.723031750465725e-05,
      "loss": 1.1936,
      "step": 7515
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4162838459014893,
      "learning_rate": 7.72069575680357e-05,
      "loss": 1.7813,
      "step": 7516
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5092219114303589,
      "learning_rate": 7.718359894328478e-05,
      "loss": 1.4616,
      "step": 7517
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2543904781341553,
      "learning_rate": 7.7160241631749e-05,
      "loss": 0.9689,
      "step": 7518
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.1625804901123047,
      "learning_rate": 7.713688563477277e-05,
      "loss": 1.7519,
      "step": 7519
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4270076751708984,
      "learning_rate": 7.711353095370026e-05,
      "loss": 1.2727,
      "step": 7520
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.173142910003662,
      "learning_rate": 7.709017758987568e-05,
      "loss": 1.5084,
      "step": 7521
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6452991962432861,
      "learning_rate": 7.706682554464322e-05,
      "loss": 1.6988,
      "step": 7522
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.072364091873169,
      "learning_rate": 7.704347481934683e-05,
      "loss": 1.2433,
      "step": 7523
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.9688166975975037,
      "learning_rate": 7.702012541533055e-05,
      "loss": 0.9052,
      "step": 7524
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.2383015155792236,
      "learning_rate": 7.699677733393826e-05,
      "loss": 1.1084,
      "step": 7525
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4628183841705322,
      "learning_rate": 7.697343057651374e-05,
      "loss": 1.2607,
      "step": 7526
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2915235757827759,
      "learning_rate": 7.695008514440074e-05,
      "loss": 1.2465,
      "step": 7527
    },
    {
      "epoch": 0.57,
      "grad_norm": 4.040245056152344,
      "learning_rate": 7.692674103894299e-05,
      "loss": 1.6758,
      "step": 7528
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8262932300567627,
      "learning_rate": 7.6903398261484e-05,
      "loss": 1.6026,
      "step": 7529
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.839434266090393,
      "learning_rate": 7.688005681336729e-05,
      "loss": 1.5664,
      "step": 7530
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8722292184829712,
      "learning_rate": 7.685671669593635e-05,
      "loss": 1.177,
      "step": 7531
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.848158597946167,
      "learning_rate": 7.683337791053446e-05,
      "loss": 1.2417,
      "step": 7532
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.0453176498413086,
      "learning_rate": 7.681004045850497e-05,
      "loss": 1.4303,
      "step": 7533
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4753355979919434,
      "learning_rate": 7.678670434119105e-05,
      "loss": 1.5976,
      "step": 7534
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.433391809463501,
      "learning_rate": 7.676336955993583e-05,
      "loss": 1.609,
      "step": 7535
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3607368469238281,
      "learning_rate": 7.674003611608235e-05,
      "loss": 1.286,
      "step": 7536
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.7450120449066162,
      "learning_rate": 7.671670401097364e-05,
      "loss": 0.7678,
      "step": 7537
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.744378685951233,
      "learning_rate": 7.669337324595252e-05,
      "loss": 1.4244,
      "step": 7538
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0298552513122559,
      "learning_rate": 7.667004382236185e-05,
      "loss": 1.0651,
      "step": 7539
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.316858172416687,
      "learning_rate": 7.66467157415444e-05,
      "loss": 1.5601,
      "step": 7540
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4398428201675415,
      "learning_rate": 7.662338900484276e-05,
      "loss": 1.5205,
      "step": 7541
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.281572937965393,
      "learning_rate": 7.660006361359955e-05,
      "loss": 1.1335,
      "step": 7542
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6702044010162354,
      "learning_rate": 7.657673956915735e-05,
      "loss": 1.6779,
      "step": 7543
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3533473014831543,
      "learning_rate": 7.655341687285851e-05,
      "loss": 1.3143,
      "step": 7544
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1080918312072754,
      "learning_rate": 7.653009552604538e-05,
      "loss": 1.5582,
      "step": 7545
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2190089225769043,
      "learning_rate": 7.650677553006031e-05,
      "loss": 1.8819,
      "step": 7546
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.304314374923706,
      "learning_rate": 7.648345688624544e-05,
      "loss": 1.1523,
      "step": 7547
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.217533826828003,
      "learning_rate": 7.646013959594289e-05,
      "loss": 1.8946,
      "step": 7548
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.258516788482666,
      "learning_rate": 7.643682366049477e-05,
      "loss": 1.7352,
      "step": 7549
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.2179324626922607,
      "learning_rate": 7.641350908124296e-05,
      "loss": 1.3343,
      "step": 7550
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3864082098007202,
      "learning_rate": 7.639019585952938e-05,
      "loss": 1.3937,
      "step": 7551
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.074232816696167,
      "learning_rate": 7.636688399669589e-05,
      "loss": 1.5825,
      "step": 7552
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3834419250488281,
      "learning_rate": 7.634357349408417e-05,
      "loss": 0.971,
      "step": 7553
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1372984647750854,
      "learning_rate": 7.632026435303584e-05,
      "loss": 1.046,
      "step": 7554
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5120623111724854,
      "learning_rate": 7.629695657489257e-05,
      "loss": 1.4493,
      "step": 7555
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2708507776260376,
      "learning_rate": 7.627365016099578e-05,
      "loss": 1.2185,
      "step": 7556
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7216577529907227,
      "learning_rate": 7.625034511268692e-05,
      "loss": 1.1582,
      "step": 7557
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5287891626358032,
      "learning_rate": 7.622704143130735e-05,
      "loss": 1.5197,
      "step": 7558
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.411158800125122,
      "learning_rate": 7.620373911819825e-05,
      "loss": 1.4406,
      "step": 7559
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6206190586090088,
      "learning_rate": 7.618043817470086e-05,
      "loss": 1.9281,
      "step": 7560
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1627047061920166,
      "learning_rate": 7.615713860215633e-05,
      "loss": 0.9766,
      "step": 7561
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1556893587112427,
      "learning_rate": 7.613384040190562e-05,
      "loss": 1.8061,
      "step": 7562
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9432460069656372,
      "learning_rate": 7.611054357528964e-05,
      "loss": 1.5909,
      "step": 7563
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.073289394378662,
      "learning_rate": 7.608724812364937e-05,
      "loss": 0.9197,
      "step": 7564
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.965144157409668,
      "learning_rate": 7.606395404832548e-05,
      "loss": 0.8845,
      "step": 7565
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.884291648864746,
      "learning_rate": 7.604066135065874e-05,
      "loss": 1.2373,
      "step": 7566
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4814200401306152,
      "learning_rate": 7.60173700319898e-05,
      "loss": 1.7136,
      "step": 7567
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5002570152282715,
      "learning_rate": 7.599408009365917e-05,
      "loss": 0.8683,
      "step": 7568
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0834596157073975,
      "learning_rate": 7.59707915370073e-05,
      "loss": 1.5526,
      "step": 7569
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9291895627975464,
      "learning_rate": 7.594750436337467e-05,
      "loss": 1.4277,
      "step": 7570
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9927492141723633,
      "learning_rate": 7.592421857410147e-05,
      "loss": 1.5909,
      "step": 7571
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1349693536758423,
      "learning_rate": 7.590093417052802e-05,
      "loss": 1.3156,
      "step": 7572
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3279366493225098,
      "learning_rate": 7.587765115399446e-05,
      "loss": 1.4103,
      "step": 7573
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0252609252929688,
      "learning_rate": 7.585436952584079e-05,
      "loss": 1.9956,
      "step": 7574
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3988124132156372,
      "learning_rate": 7.583108928740707e-05,
      "loss": 1.1204,
      "step": 7575
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.930490493774414,
      "learning_rate": 7.580781044003324e-05,
      "loss": 1.6649,
      "step": 7576
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.187188148498535,
      "learning_rate": 7.578453298505907e-05,
      "loss": 2.1237,
      "step": 7577
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1655495166778564,
      "learning_rate": 7.57612569238243e-05,
      "loss": 1.3801,
      "step": 7578
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2356761693954468,
      "learning_rate": 7.57379822576687e-05,
      "loss": 0.9934,
      "step": 7579
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1596567630767822,
      "learning_rate": 7.571470898793173e-05,
      "loss": 0.7983,
      "step": 7580
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.23235023021698,
      "learning_rate": 7.569143711595298e-05,
      "loss": 1.2842,
      "step": 7581
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.6275618076324463,
      "learning_rate": 7.566816664307184e-05,
      "loss": 2.0135,
      "step": 7582
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1289429664611816,
      "learning_rate": 7.564489757062774e-05,
      "loss": 1.6857,
      "step": 7583
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2878663539886475,
      "learning_rate": 7.562162989995984e-05,
      "loss": 1.0063,
      "step": 7584
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3051555156707764,
      "learning_rate": 7.55983636324074e-05,
      "loss": 1.3708,
      "step": 7585
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0927084684371948,
      "learning_rate": 7.557509876930955e-05,
      "loss": 1.464,
      "step": 7586
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.541398048400879,
      "learning_rate": 7.55518353120052e-05,
      "loss": 2.0063,
      "step": 7587
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4718573093414307,
      "learning_rate": 7.55285732618334e-05,
      "loss": 1.7482,
      "step": 7588
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.804288625717163,
      "learning_rate": 7.5505312620133e-05,
      "loss": 1.859,
      "step": 7589
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8776838779449463,
      "learning_rate": 7.548205338824277e-05,
      "loss": 1.5647,
      "step": 7590
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5469251871109009,
      "learning_rate": 7.545879556750138e-05,
      "loss": 1.705,
      "step": 7591
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0810331106185913,
      "learning_rate": 7.543553915924753e-05,
      "loss": 0.8823,
      "step": 7592
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2049039602279663,
      "learning_rate": 7.541228416481967e-05,
      "loss": 1.3267,
      "step": 7593
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.216792345046997,
      "learning_rate": 7.538903058555632e-05,
      "loss": 1.7007,
      "step": 7594
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8094793558120728,
      "learning_rate": 7.536577842279583e-05,
      "loss": 1.2962,
      "step": 7595
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.988280713558197,
      "learning_rate": 7.534252767787653e-05,
      "loss": 1.2812,
      "step": 7596
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0368363857269287,
      "learning_rate": 7.531927835213656e-05,
      "loss": 2.0948,
      "step": 7597
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1589990854263306,
      "learning_rate": 7.529603044691417e-05,
      "loss": 0.875,
      "step": 7598
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7135297060012817,
      "learning_rate": 7.527278396354728e-05,
      "loss": 1.3395,
      "step": 7599
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6921477317810059,
      "learning_rate": 7.524953890337395e-05,
      "loss": 1.4845,
      "step": 7600
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9345218539237976,
      "learning_rate": 7.522629526773206e-05,
      "loss": 1.158,
      "step": 7601
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0223323106765747,
      "learning_rate": 7.520305305795934e-05,
      "loss": 1.2732,
      "step": 7602
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.520176887512207,
      "learning_rate": 7.517981227539357e-05,
      "loss": 1.419,
      "step": 7603
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3945820331573486,
      "learning_rate": 7.515657292137244e-05,
      "loss": 1.1824,
      "step": 7604
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9434056282043457,
      "learning_rate": 7.513333499723343e-05,
      "loss": 1.5588,
      "step": 7605
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4415059089660645,
      "learning_rate": 7.5110098504314e-05,
      "loss": 1.3107,
      "step": 7606
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4944794178009033,
      "learning_rate": 7.508686344395165e-05,
      "loss": 2.1,
      "step": 7607
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9251688718795776,
      "learning_rate": 7.506362981748358e-05,
      "loss": 1.0007,
      "step": 7608
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1945420503616333,
      "learning_rate": 7.504039762624709e-05,
      "loss": 1.0514,
      "step": 7609
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1648370027542114,
      "learning_rate": 7.50171668715793e-05,
      "loss": 1.5661,
      "step": 7610
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6361018419265747,
      "learning_rate": 7.499393755481725e-05,
      "loss": 0.894,
      "step": 7611
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2180695533752441,
      "learning_rate": 7.497070967729795e-05,
      "loss": 1.4471,
      "step": 7612
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.727463722229004,
      "learning_rate": 7.494748324035832e-05,
      "loss": 1.6128,
      "step": 7613
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.740398645401001,
      "learning_rate": 7.492425824533513e-05,
      "loss": 0.8788,
      "step": 7614
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.306556463241577,
      "learning_rate": 7.490103469356513e-05,
      "loss": 1.8277,
      "step": 7615
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6770856380462646,
      "learning_rate": 7.4877812586385e-05,
      "loss": 1.9435,
      "step": 7616
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.392086386680603,
      "learning_rate": 7.485459192513122e-05,
      "loss": 1.1944,
      "step": 7617
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3561863899230957,
      "learning_rate": 7.483137271114037e-05,
      "loss": 1.492,
      "step": 7618
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9892858266830444,
      "learning_rate": 7.480815494574882e-05,
      "loss": 1.2064,
      "step": 7619
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4513063430786133,
      "learning_rate": 7.478493863029283e-05,
      "loss": 1.3558,
      "step": 7620
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2008097171783447,
      "learning_rate": 7.476172376610868e-05,
      "loss": 0.7796,
      "step": 7621
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.378661036491394,
      "learning_rate": 7.473851035453255e-05,
      "loss": 1.612,
      "step": 7622
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3867090940475464,
      "learning_rate": 7.471529839690046e-05,
      "loss": 0.8003,
      "step": 7623
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1488709449768066,
      "learning_rate": 7.469208789454838e-05,
      "loss": 1.6374,
      "step": 7624
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6369998455047607,
      "learning_rate": 7.466887884881226e-05,
      "loss": 1.4232,
      "step": 7625
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5986552238464355,
      "learning_rate": 7.464567126102786e-05,
      "loss": 0.7489,
      "step": 7626
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0846450328826904,
      "learning_rate": 7.462246513253094e-05,
      "loss": 1.0308,
      "step": 7627
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3142051696777344,
      "learning_rate": 7.459926046465715e-05,
      "loss": 1.9753,
      "step": 7628
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2882070541381836,
      "learning_rate": 7.457605725874204e-05,
      "loss": 1.4957,
      "step": 7629
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3579933643341064,
      "learning_rate": 7.455285551612105e-05,
      "loss": 1.6751,
      "step": 7630
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0050241947174072,
      "learning_rate": 7.45296552381297e-05,
      "loss": 1.9451,
      "step": 7631
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.9861226081848145,
      "learning_rate": 7.450645642610314e-05,
      "loss": 1.6546,
      "step": 7632
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.001033067703247,
      "learning_rate": 7.44832590813767e-05,
      "loss": 1.0349,
      "step": 7633
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6489312648773193,
      "learning_rate": 7.44600632052855e-05,
      "loss": 1.5313,
      "step": 7634
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.215303897857666,
      "learning_rate": 7.443686879916455e-05,
      "loss": 1.3676,
      "step": 7635
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6452692747116089,
      "learning_rate": 7.441367586434887e-05,
      "loss": 1.4904,
      "step": 7636
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.8149898052215576,
      "learning_rate": 7.439048440217336e-05,
      "loss": 1.7795,
      "step": 7637
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5872790813446045,
      "learning_rate": 7.436729441397278e-05,
      "loss": 1.1286,
      "step": 7638
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.319331407546997,
      "learning_rate": 7.434410590108184e-05,
      "loss": 1.4377,
      "step": 7639
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.288931369781494,
      "learning_rate": 7.432091886483523e-05,
      "loss": 1.4992,
      "step": 7640
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.321974277496338,
      "learning_rate": 7.429773330656744e-05,
      "loss": 1.4228,
      "step": 7641
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6187937259674072,
      "learning_rate": 7.427454922761296e-05,
      "loss": 1.3893,
      "step": 7642
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9363378286361694,
      "learning_rate": 7.425136662930621e-05,
      "loss": 0.7332,
      "step": 7643
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.1079972982406616,
      "learning_rate": 7.422818551298137e-05,
      "loss": 1.2292,
      "step": 7644
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5686097145080566,
      "learning_rate": 7.42050058799727e-05,
      "loss": 1.5605,
      "step": 7645
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.52576744556427,
      "learning_rate": 7.41818277316144e-05,
      "loss": 1.2143,
      "step": 7646
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3113995790481567,
      "learning_rate": 7.415865106924041e-05,
      "loss": 1.3119,
      "step": 7647
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.216094732284546,
      "learning_rate": 7.413547589418468e-05,
      "loss": 1.0523,
      "step": 7648
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2207080125808716,
      "learning_rate": 7.411230220778114e-05,
      "loss": 1.4771,
      "step": 7649
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7724084854125977,
      "learning_rate": 7.408913001136349e-05,
      "loss": 0.9259,
      "step": 7650
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.9788831472396851,
      "learning_rate": 7.40659593062655e-05,
      "loss": 0.8337,
      "step": 7651
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2790422439575195,
      "learning_rate": 7.404279009382075e-05,
      "loss": 1.5544,
      "step": 7652
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.501643419265747,
      "learning_rate": 7.401962237536272e-05,
      "loss": 1.2303,
      "step": 7653
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2189393043518066,
      "learning_rate": 7.399645615222488e-05,
      "loss": 0.8917,
      "step": 7654
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3589212894439697,
      "learning_rate": 7.397329142574063e-05,
      "loss": 1.3308,
      "step": 7655
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.136163353919983,
      "learning_rate": 7.395012819724314e-05,
      "loss": 1.2042,
      "step": 7656
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4842102527618408,
      "learning_rate": 7.392696646806561e-05,
      "loss": 1.8458,
      "step": 7657
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.368496060371399,
      "learning_rate": 7.390380623954121e-05,
      "loss": 1.4493,
      "step": 7658
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3886809349060059,
      "learning_rate": 7.388064751300281e-05,
      "loss": 1.1996,
      "step": 7659
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0168694257736206,
      "learning_rate": 7.385749028978346e-05,
      "loss": 1.12,
      "step": 7660
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6715195178985596,
      "learning_rate": 7.383433457121592e-05,
      "loss": 1.7495,
      "step": 7661
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.0439929962158203,
      "learning_rate": 7.381118035863294e-05,
      "loss": 1.1876,
      "step": 7662
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3198829889297485,
      "learning_rate": 7.378802765336716e-05,
      "loss": 0.8444,
      "step": 7663
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0443918704986572,
      "learning_rate": 7.376487645675124e-05,
      "loss": 1.0139,
      "step": 7664
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3542970418930054,
      "learning_rate": 7.374172677011754e-05,
      "loss": 1.1766,
      "step": 7665
    },
    {
      "epoch": 0.58,
      "grad_norm": 4.483761787414551,
      "learning_rate": 7.371857859479857e-05,
      "loss": 2.6123,
      "step": 7666
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.038487434387207,
      "learning_rate": 7.369543193212659e-05,
      "loss": 1.3342,
      "step": 7667
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.4232423305511475,
      "learning_rate": 7.367228678343379e-05,
      "loss": 1.4431,
      "step": 7668
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3711806535720825,
      "learning_rate": 7.364914315005233e-05,
      "loss": 1.0147,
      "step": 7669
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.504289388656616,
      "learning_rate": 7.362600103331432e-05,
      "loss": 1.5592,
      "step": 7670
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.591120719909668,
      "learning_rate": 7.360286043455164e-05,
      "loss": 1.3131,
      "step": 7671
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.26569402217865,
      "learning_rate": 7.357972135509619e-05,
      "loss": 0.918,
      "step": 7672
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.371954321861267,
      "learning_rate": 7.35565837962798e-05,
      "loss": 1.6388,
      "step": 7673
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.08150315284729,
      "learning_rate": 7.353344775943409e-05,
      "loss": 1.2976,
      "step": 7674
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8727288246154785,
      "learning_rate": 7.351031324589074e-05,
      "loss": 1.5144,
      "step": 7675
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2725257873535156,
      "learning_rate": 7.348718025698127e-05,
      "loss": 1.2975,
      "step": 7676
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.249195098876953,
      "learning_rate": 7.346404879403705e-05,
      "loss": 1.5971,
      "step": 7677
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1386773586273193,
      "learning_rate": 7.344091885838948e-05,
      "loss": 1.352,
      "step": 7678
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4380428791046143,
      "learning_rate": 7.341779045136985e-05,
      "loss": 1.1187,
      "step": 7679
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2582672834396362,
      "learning_rate": 7.339466357430928e-05,
      "loss": 1.5792,
      "step": 7680
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.854781985282898,
      "learning_rate": 7.337153822853886e-05,
      "loss": 1.3941,
      "step": 7681
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.351078987121582,
      "learning_rate": 7.334841441538965e-05,
      "loss": 1.6601,
      "step": 7682
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1755542755126953,
      "learning_rate": 7.332529213619244e-05,
      "loss": 1.6085,
      "step": 7683
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0353810787200928,
      "learning_rate": 7.330217139227815e-05,
      "loss": 1.1973,
      "step": 7684
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2001972198486328,
      "learning_rate": 7.327905218497752e-05,
      "loss": 0.6127,
      "step": 7685
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0438802242279053,
      "learning_rate": 7.32559345156211e-05,
      "loss": 1.5814,
      "step": 7686
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5763648748397827,
      "learning_rate": 7.323281838553948e-05,
      "loss": 1.1294,
      "step": 7687
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.500571608543396,
      "learning_rate": 7.32097037960632e-05,
      "loss": 1.4575,
      "step": 7688
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4455640316009521,
      "learning_rate": 7.318659074852257e-05,
      "loss": 1.6487,
      "step": 7689
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2794893980026245,
      "learning_rate": 7.316347924424787e-05,
      "loss": 1.2263,
      "step": 7690
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3446804285049438,
      "learning_rate": 7.314036928456935e-05,
      "loss": 1.5613,
      "step": 7691
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4777129888534546,
      "learning_rate": 7.311726087081707e-05,
      "loss": 1.6475,
      "step": 7692
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.7804272174835205,
      "learning_rate": 7.309415400432108e-05,
      "loss": 1.5616,
      "step": 7693
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7421363592147827,
      "learning_rate": 7.307104868641131e-05,
      "loss": 1.1213,
      "step": 7694
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8912956714630127,
      "learning_rate": 7.304794491841761e-05,
      "loss": 1.2846,
      "step": 7695
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4579317569732666,
      "learning_rate": 7.30248427016697e-05,
      "loss": 1.5001,
      "step": 7696
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4357208013534546,
      "learning_rate": 7.300174203749733e-05,
      "loss": 0.9213,
      "step": 7697
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5365265607833862,
      "learning_rate": 7.297864292722996e-05,
      "loss": 1.6071,
      "step": 7698
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8017232418060303,
      "learning_rate": 7.295554537219716e-05,
      "loss": 1.6394,
      "step": 7699
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3657094240188599,
      "learning_rate": 7.29324493737283e-05,
      "loss": 1.4223,
      "step": 7700
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.7529776096343994,
      "learning_rate": 7.290935493315267e-05,
      "loss": 2.0458,
      "step": 7701
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.386102557182312,
      "learning_rate": 7.288626205179951e-05,
      "loss": 1.7565,
      "step": 7702
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3124669790267944,
      "learning_rate": 7.2863170730998e-05,
      "loss": 1.5149,
      "step": 7703
    },
    {
      "epoch": 0.59,
      "grad_norm": 4.77600622177124,
      "learning_rate": 7.284008097207708e-05,
      "loss": 2.4646,
      "step": 7704
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5851978063583374,
      "learning_rate": 7.281699277636572e-05,
      "loss": 1.3379,
      "step": 7705
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.176927089691162,
      "learning_rate": 7.279390614519286e-05,
      "loss": 1.7813,
      "step": 7706
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.135871171951294,
      "learning_rate": 7.277082107988716e-05,
      "loss": 1.2805,
      "step": 7707
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9739527702331543,
      "learning_rate": 7.274773758177735e-05,
      "loss": 0.7809,
      "step": 7708
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2416200637817383,
      "learning_rate": 7.272465565219206e-05,
      "loss": 1.4682,
      "step": 7709
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9707180261611938,
      "learning_rate": 7.270157529245969e-05,
      "loss": 2.1181,
      "step": 7710
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5099353790283203,
      "learning_rate": 7.26784965039087e-05,
      "loss": 1.032,
      "step": 7711
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9998569488525391,
      "learning_rate": 7.265541928786748e-05,
      "loss": 0.831,
      "step": 7712
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3197977542877197,
      "learning_rate": 7.263234364566414e-05,
      "loss": 1.542,
      "step": 7713
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3271667957305908,
      "learning_rate": 7.260926957862683e-05,
      "loss": 1.2626,
      "step": 7714
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5335785150527954,
      "learning_rate": 7.258619708808367e-05,
      "loss": 1.2077,
      "step": 7715
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4802052974700928,
      "learning_rate": 7.256312617536255e-05,
      "loss": 1.498,
      "step": 7716
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8914031386375427,
      "learning_rate": 7.254005684179136e-05,
      "loss": 1.0199,
      "step": 7717
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7271265983581543,
      "learning_rate": 7.251698908869785e-05,
      "loss": 1.3314,
      "step": 7718
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0786404609680176,
      "learning_rate": 7.249392291740976e-05,
      "loss": 0.9042,
      "step": 7719
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0937652587890625,
      "learning_rate": 7.24708583292546e-05,
      "loss": 1.1047,
      "step": 7720
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.8577492237091064,
      "learning_rate": 7.244779532555992e-05,
      "loss": 1.3702,
      "step": 7721
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2562074661254883,
      "learning_rate": 7.242473390765314e-05,
      "loss": 1.0326,
      "step": 7722
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.159125804901123,
      "learning_rate": 7.240167407686154e-05,
      "loss": 0.8578,
      "step": 7723
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3778159618377686,
      "learning_rate": 7.237861583451234e-05,
      "loss": 0.7619,
      "step": 7724
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.091017007827759,
      "learning_rate": 7.235555918193274e-05,
      "loss": 1.9438,
      "step": 7725
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2718825340270996,
      "learning_rate": 7.23325041204497e-05,
      "loss": 1.1709,
      "step": 7726
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0226693153381348,
      "learning_rate": 7.230945065139023e-05,
      "loss": 1.1376,
      "step": 7727
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9702112674713135,
      "learning_rate": 7.228639877608119e-05,
      "loss": 1.0139,
      "step": 7728
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.615966796875,
      "learning_rate": 7.226334849584928e-05,
      "loss": 1.8287,
      "step": 7729
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.387453317642212,
      "learning_rate": 7.224029981202122e-05,
      "loss": 1.3884,
      "step": 7730
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6378583908081055,
      "learning_rate": 7.221725272592366e-05,
      "loss": 0.8308,
      "step": 7731
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5468121767044067,
      "learning_rate": 7.2194207238883e-05,
      "loss": 1.0496,
      "step": 7732
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.608251929283142,
      "learning_rate": 7.217116335222565e-05,
      "loss": 1.6716,
      "step": 7733
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3047631978988647,
      "learning_rate": 7.214812106727799e-05,
      "loss": 0.5205,
      "step": 7734
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.700675129890442,
      "learning_rate": 7.212508038536613e-05,
      "loss": 1.359,
      "step": 7735
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.7961418628692627,
      "learning_rate": 7.210204130781628e-05,
      "loss": 1.1521,
      "step": 7736
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4375592470169067,
      "learning_rate": 7.207900383595446e-05,
      "loss": 1.2601,
      "step": 7737
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.7975000739097595,
      "learning_rate": 7.205596797110654e-05,
      "loss": 0.8609,
      "step": 7738
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6262719631195068,
      "learning_rate": 7.203293371459843e-05,
      "loss": 2.1668,
      "step": 7739
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.258882999420166,
      "learning_rate": 7.200990106775591e-05,
      "loss": 1.4699,
      "step": 7740
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0828866958618164,
      "learning_rate": 7.198687003190458e-05,
      "loss": 1.3935,
      "step": 7741
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.064297080039978,
      "learning_rate": 7.196384060837001e-05,
      "loss": 0.6833,
      "step": 7742
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2501893043518066,
      "learning_rate": 7.194081279847773e-05,
      "loss": 1.0957,
      "step": 7743
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0361448526382446,
      "learning_rate": 7.191778660355304e-05,
      "loss": 1.8917,
      "step": 7744
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8413469791412354,
      "learning_rate": 7.189476202492131e-05,
      "loss": 1.4717,
      "step": 7745
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3883888721466064,
      "learning_rate": 7.187173906390773e-05,
      "loss": 1.5401,
      "step": 7746
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4045367240905762,
      "learning_rate": 7.184871772183735e-05,
      "loss": 1.5458,
      "step": 7747
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.056051254272461,
      "learning_rate": 7.18256980000352e-05,
      "loss": 1.2311,
      "step": 7748
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2896422147750854,
      "learning_rate": 7.180267989982623e-05,
      "loss": 0.8946,
      "step": 7749
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5615156888961792,
      "learning_rate": 7.177966342253524e-05,
      "loss": 2.2634,
      "step": 7750
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2385380268096924,
      "learning_rate": 7.175664856948694e-05,
      "loss": 1.4172,
      "step": 7751
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9457262754440308,
      "learning_rate": 7.173363534200603e-05,
      "loss": 1.238,
      "step": 7752
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0483204126358032,
      "learning_rate": 7.171062374141699e-05,
      "loss": 1.4621,
      "step": 7753
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1996734142303467,
      "learning_rate": 7.168761376904429e-05,
      "loss": 1.578,
      "step": 7754
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.11644446849823,
      "learning_rate": 7.16646054262123e-05,
      "loss": 1.1945,
      "step": 7755
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6713857650756836,
      "learning_rate": 7.164159871424527e-05,
      "loss": 1.4387,
      "step": 7756
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6554267406463623,
      "learning_rate": 7.161859363446735e-05,
      "loss": 1.5262,
      "step": 7757
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7045780420303345,
      "learning_rate": 7.159559018820268e-05,
      "loss": 1.3237,
      "step": 7758
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.028473138809204,
      "learning_rate": 7.157258837677514e-05,
      "loss": 1.0885,
      "step": 7759
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.310072422027588,
      "learning_rate": 7.154958820150872e-05,
      "loss": 1.5177,
      "step": 7760
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7756820917129517,
      "learning_rate": 7.152658966372717e-05,
      "loss": 0.851,
      "step": 7761
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8076938390731812,
      "learning_rate": 7.150359276475413e-05,
      "loss": 0.9002,
      "step": 7762
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1449902057647705,
      "learning_rate": 7.148059750591328e-05,
      "loss": 1.1467,
      "step": 7763
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4538847208023071,
      "learning_rate": 7.145760388852814e-05,
      "loss": 1.3118,
      "step": 7764
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3999104499816895,
      "learning_rate": 7.143461191392206e-05,
      "loss": 1.5598,
      "step": 7765
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2609692811965942,
      "learning_rate": 7.141162158341839e-05,
      "loss": 1.4854,
      "step": 7766
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1641812324523926,
      "learning_rate": 7.138863289834039e-05,
      "loss": 1.5012,
      "step": 7767
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3037340641021729,
      "learning_rate": 7.136564586001113e-05,
      "loss": 1.0684,
      "step": 7768
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0641369819641113,
      "learning_rate": 7.134266046975369e-05,
      "loss": 1.4765,
      "step": 7769
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8564423322677612,
      "learning_rate": 7.131967672889101e-05,
      "loss": 1.7167,
      "step": 7770
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.211591124534607,
      "learning_rate": 7.129669463874587e-05,
      "loss": 1.4843,
      "step": 7771
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.299658179283142,
      "learning_rate": 7.12737142006411e-05,
      "loss": 1.1269,
      "step": 7772
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.987215042114258,
      "learning_rate": 7.125073541589934e-05,
      "loss": 1.6416,
      "step": 7773
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.8150603771209717,
      "learning_rate": 7.122775828584316e-05,
      "loss": 1.3582,
      "step": 7774
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.127718210220337,
      "learning_rate": 7.120478281179496e-05,
      "loss": 1.6672,
      "step": 7775
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1460604667663574,
      "learning_rate": 7.118180899507722e-05,
      "loss": 1.0805,
      "step": 7776
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.049388885498047,
      "learning_rate": 7.11588368370121e-05,
      "loss": 1.0997,
      "step": 7777
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.555964708328247,
      "learning_rate": 7.113586633892187e-05,
      "loss": 0.8212,
      "step": 7778
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5720497369766235,
      "learning_rate": 7.111289750212857e-05,
      "loss": 1.3636,
      "step": 7779
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.254572868347168,
      "learning_rate": 7.108993032795418e-05,
      "loss": 1.2082,
      "step": 7780
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0347602367401123,
      "learning_rate": 7.10669648177206e-05,
      "loss": 1.5644,
      "step": 7781
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3200057744979858,
      "learning_rate": 7.104400097274968e-05,
      "loss": 1.1015,
      "step": 7782
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8993510007858276,
      "learning_rate": 7.102103879436306e-05,
      "loss": 1.6356,
      "step": 7783
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.8709589242935181,
      "learning_rate": 7.099807828388235e-05,
      "loss": 0.9258,
      "step": 7784
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1806271076202393,
      "learning_rate": 7.097511944262912e-05,
      "loss": 1.1214,
      "step": 7785
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5607010126113892,
      "learning_rate": 7.095216227192467e-05,
      "loss": 2.0846,
      "step": 7786
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2166712284088135,
      "learning_rate": 7.092920677309043e-05,
      "loss": 1.4459,
      "step": 7787
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.9909302592277527,
      "learning_rate": 7.090625294744756e-05,
      "loss": 0.6436,
      "step": 7788
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7124767303466797,
      "learning_rate": 7.08833007963172e-05,
      "loss": 1.3871,
      "step": 7789
    },
    {
      "epoch": 0.59,
      "grad_norm": 5.149282932281494,
      "learning_rate": 7.086035032102036e-05,
      "loss": 3.3514,
      "step": 7790
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.91464102268219,
      "learning_rate": 7.083740152287804e-05,
      "loss": 1.0341,
      "step": 7791
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.0976113080978394,
      "learning_rate": 7.081445440321098e-05,
      "loss": 1.0971,
      "step": 7792
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7755796909332275,
      "learning_rate": 7.079150896333999e-05,
      "loss": 1.2032,
      "step": 7793
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1131632328033447,
      "learning_rate": 7.07685652045857e-05,
      "loss": 1.794,
      "step": 7794
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3356409072875977,
      "learning_rate": 7.07456231282686e-05,
      "loss": 1.3582,
      "step": 7795
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6808315515518188,
      "learning_rate": 7.072268273570917e-05,
      "loss": 1.2812,
      "step": 7796
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.702056884765625,
      "learning_rate": 7.069974402822783e-05,
      "loss": 1.4615,
      "step": 7797
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2289040088653564,
      "learning_rate": 7.067680700714476e-05,
      "loss": 1.3236,
      "step": 7798
    },
    {
      "epoch": 0.6,
      "grad_norm": 5.865152835845947,
      "learning_rate": 7.06538716737801e-05,
      "loss": 2.1171,
      "step": 7799
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2037270069122314,
      "learning_rate": 7.063093802945399e-05,
      "loss": 1.355,
      "step": 7800
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.37446928024292,
      "learning_rate": 7.06080060754863e-05,
      "loss": 1.7348,
      "step": 7801
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1625423431396484,
      "learning_rate": 7.058507581319698e-05,
      "loss": 1.5461,
      "step": 7802
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0302704572677612,
      "learning_rate": 7.056214724390577e-05,
      "loss": 1.6242,
      "step": 7803
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8588223457336426,
      "learning_rate": 7.053922036893229e-05,
      "loss": 1.0071,
      "step": 7804
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7610901594161987,
      "learning_rate": 7.051629518959614e-05,
      "loss": 1.0714,
      "step": 7805
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2914446592330933,
      "learning_rate": 7.049337170721689e-05,
      "loss": 1.4811,
      "step": 7806
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3993587493896484,
      "learning_rate": 7.04704499231138e-05,
      "loss": 1.4162,
      "step": 7807
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4877244234085083,
      "learning_rate": 7.044752983860619e-05,
      "loss": 1.5841,
      "step": 7808
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2445628643035889,
      "learning_rate": 7.042461145501325e-05,
      "loss": 1.3747,
      "step": 7809
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1308140754699707,
      "learning_rate": 7.040169477365403e-05,
      "loss": 1.9184,
      "step": 7810
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.022858738899231,
      "learning_rate": 7.037877979584759e-05,
      "loss": 1.3063,
      "step": 7811
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0997408628463745,
      "learning_rate": 7.035586652291278e-05,
      "loss": 0.8946,
      "step": 7812
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5304253101348877,
      "learning_rate": 7.033295495616834e-05,
      "loss": 1.2861,
      "step": 7813
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0390182733535767,
      "learning_rate": 7.031004509693299e-05,
      "loss": 1.3442,
      "step": 7814
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.1874775886535645,
      "learning_rate": 7.028713694652541e-05,
      "loss": 0.9569,
      "step": 7815
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1301209926605225,
      "learning_rate": 7.026423050626401e-05,
      "loss": 0.7601,
      "step": 7816
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.288933753967285,
      "learning_rate": 7.024132577746716e-05,
      "loss": 1.9613,
      "step": 7817
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0890107154846191,
      "learning_rate": 7.021842276145326e-05,
      "loss": 1.9044,
      "step": 7818
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4601447582244873,
      "learning_rate": 7.019552145954041e-05,
      "loss": 1.4866,
      "step": 7819
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.656588554382324,
      "learning_rate": 7.017262187304678e-05,
      "loss": 1.8406,
      "step": 7820
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7522492408752441,
      "learning_rate": 7.014972400329034e-05,
      "loss": 1.2099,
      "step": 7821
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6209315061569214,
      "learning_rate": 7.012682785158901e-05,
      "loss": 1.3352,
      "step": 7822
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9919680953025818,
      "learning_rate": 7.010393341926058e-05,
      "loss": 0.8968,
      "step": 7823
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2453628778457642,
      "learning_rate": 7.008104070762277e-05,
      "loss": 1.5196,
      "step": 7824
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5468580722808838,
      "learning_rate": 7.005814971799318e-05,
      "loss": 1.3466,
      "step": 7825
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6464471817016602,
      "learning_rate": 7.003526045168931e-05,
      "loss": 1.9246,
      "step": 7826
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2005953788757324,
      "learning_rate": 7.001237291002863e-05,
      "loss": 0.9911,
      "step": 7827
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3051038980484009,
      "learning_rate": 6.998948709432833e-05,
      "loss": 1.6788,
      "step": 7828
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7089604139328003,
      "learning_rate": 6.99666030059057e-05,
      "loss": 1.8195,
      "step": 7829
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4964733123779297,
      "learning_rate": 6.994372064607788e-05,
      "loss": 1.9427,
      "step": 7830
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3413033485412598,
      "learning_rate": 6.992084001616182e-05,
      "loss": 1.5977,
      "step": 7831
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2928017377853394,
      "learning_rate": 6.989796111747444e-05,
      "loss": 1.4307,
      "step": 7832
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0980381965637207,
      "learning_rate": 6.987508395133258e-05,
      "loss": 1.1643,
      "step": 7833
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1439995765686035,
      "learning_rate": 6.985220851905292e-05,
      "loss": 1.295,
      "step": 7834
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6732028722763062,
      "learning_rate": 6.98293348219521e-05,
      "loss": 1.4565,
      "step": 7835
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2942087650299072,
      "learning_rate": 6.980646286134666e-05,
      "loss": 1.3895,
      "step": 7836
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.158151388168335,
      "learning_rate": 6.978359263855292e-05,
      "loss": 1.3854,
      "step": 7837
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3250517845153809,
      "learning_rate": 6.976072415488726e-05,
      "loss": 1.5307,
      "step": 7838
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.6336803436279297,
      "learning_rate": 6.973785741166592e-05,
      "loss": 1.649,
      "step": 7839
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9611749053001404,
      "learning_rate": 6.971499241020495e-05,
      "loss": 1.0298,
      "step": 7840
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7026482820510864,
      "learning_rate": 6.969212915182037e-05,
      "loss": 0.9017,
      "step": 7841
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2960031032562256,
      "learning_rate": 6.966926763782815e-05,
      "loss": 1.6462,
      "step": 7842
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5086660385131836,
      "learning_rate": 6.964640786954401e-05,
      "loss": 1.8258,
      "step": 7843
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4340529441833496,
      "learning_rate": 6.962354984828375e-05,
      "loss": 1.7017,
      "step": 7844
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5356446504592896,
      "learning_rate": 6.960069357536294e-05,
      "loss": 1.3808,
      "step": 7845
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.619913935661316,
      "learning_rate": 6.957783905209708e-05,
      "loss": 0.9934,
      "step": 7846
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.462115526199341,
      "learning_rate": 6.955498627980155e-05,
      "loss": 1.7975,
      "step": 7847
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.497828483581543,
      "learning_rate": 6.953213525979178e-05,
      "loss": 1.3252,
      "step": 7848
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.797279953956604,
      "learning_rate": 6.950928599338287e-05,
      "loss": 1.175,
      "step": 7849
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5625778436660767,
      "learning_rate": 6.948643848188991e-05,
      "loss": 1.9282,
      "step": 7850
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4872395992279053,
      "learning_rate": 6.946359272662803e-05,
      "loss": 1.2944,
      "step": 7851
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8886653184890747,
      "learning_rate": 6.944074872891199e-05,
      "loss": 1.0502,
      "step": 7852
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.234653115272522,
      "learning_rate": 6.941790649005666e-05,
      "loss": 1.6533,
      "step": 7853
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5582997798919678,
      "learning_rate": 6.939506601137675e-05,
      "loss": 1.4828,
      "step": 7854
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.063943862915039,
      "learning_rate": 6.93722272941869e-05,
      "loss": 1.6758,
      "step": 7855
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.282440423965454,
      "learning_rate": 6.93493903398015e-05,
      "loss": 1.7844,
      "step": 7856
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.512012243270874,
      "learning_rate": 6.9326555149535e-05,
      "loss": 1.2828,
      "step": 7857
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.500949740409851,
      "learning_rate": 6.930372172470178e-05,
      "loss": 1.1655,
      "step": 7858
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0133697986602783,
      "learning_rate": 6.928089006661592e-05,
      "loss": 1.5784,
      "step": 7859
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.4043326377868652,
      "learning_rate": 6.925806017659153e-05,
      "loss": 1.5551,
      "step": 7860
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8130542039871216,
      "learning_rate": 6.923523205594268e-05,
      "loss": 0.9419,
      "step": 7861
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.308702826499939,
      "learning_rate": 6.921240570598315e-05,
      "loss": 2.0112,
      "step": 7862
    },
    {
      "epoch": 0.6,
      "grad_norm": 8.590018272399902,
      "learning_rate": 6.918958112802682e-05,
      "loss": 1.6622,
      "step": 7863
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5575987100601196,
      "learning_rate": 6.916675832338734e-05,
      "loss": 2.0846,
      "step": 7864
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.576415538787842,
      "learning_rate": 6.914393729337827e-05,
      "loss": 0.6443,
      "step": 7865
    },
    {
      "epoch": 0.6,
      "grad_norm": 5.640219688415527,
      "learning_rate": 6.91211180393131e-05,
      "loss": 2.0532,
      "step": 7866
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.104617714881897,
      "learning_rate": 6.909830056250527e-05,
      "loss": 1.6932,
      "step": 7867
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.177950143814087,
      "learning_rate": 6.907548486426798e-05,
      "loss": 1.4804,
      "step": 7868
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2769179344177246,
      "learning_rate": 6.905267094591443e-05,
      "loss": 0.9425,
      "step": 7869
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9836145043373108,
      "learning_rate": 6.902985880875773e-05,
      "loss": 0.8853,
      "step": 7870
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0963119268417358,
      "learning_rate": 6.900704845411079e-05,
      "loss": 1.0431,
      "step": 7871
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.6097021102905273,
      "learning_rate": 6.898423988328651e-05,
      "loss": 1.5332,
      "step": 7872
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.567844271659851,
      "learning_rate": 6.896143309759767e-05,
      "loss": 1.248,
      "step": 7873
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7857766151428223,
      "learning_rate": 6.893862809835688e-05,
      "loss": 1.6816,
      "step": 7874
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1681057214736938,
      "learning_rate": 6.891582488687671e-05,
      "loss": 1.1992,
      "step": 7875
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2816526889801025,
      "learning_rate": 6.889302346446969e-05,
      "loss": 0.9556,
      "step": 7876
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6677119731903076,
      "learning_rate": 6.887022383244808e-05,
      "loss": 1.436,
      "step": 7877
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7585535049438477,
      "learning_rate": 6.884742599212417e-05,
      "loss": 1.5664,
      "step": 7878
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5155973434448242,
      "learning_rate": 6.882462994481013e-05,
      "loss": 1.3845,
      "step": 7879
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.195429563522339,
      "learning_rate": 6.880183569181795e-05,
      "loss": 2.0404,
      "step": 7880
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.449272394180298,
      "learning_rate": 6.877904323445962e-05,
      "loss": 1.8294,
      "step": 7881
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0955852270126343,
      "learning_rate": 6.875625257404694e-05,
      "loss": 0.6587,
      "step": 7882
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.814449429512024,
      "learning_rate": 6.873346371189169e-05,
      "loss": 1.3048,
      "step": 7883
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.38767409324646,
      "learning_rate": 6.871067664930542e-05,
      "loss": 1.1584,
      "step": 7884
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2931833267211914,
      "learning_rate": 6.868789138759976e-05,
      "loss": 1.2401,
      "step": 7885
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5133131742477417,
      "learning_rate": 6.866510792808603e-05,
      "loss": 1.0469,
      "step": 7886
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2648158073425293,
      "learning_rate": 6.864232627207563e-05,
      "loss": 1.608,
      "step": 7887
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4592015743255615,
      "learning_rate": 6.861954642087976e-05,
      "loss": 2.0221,
      "step": 7888
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2520424127578735,
      "learning_rate": 6.859676837580949e-05,
      "loss": 1.3503,
      "step": 7889
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5444871187210083,
      "learning_rate": 6.857399213817584e-05,
      "loss": 1.2629,
      "step": 7890
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1450692415237427,
      "learning_rate": 6.855121770928977e-05,
      "loss": 0.6793,
      "step": 7891
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.030301809310913,
      "learning_rate": 6.8528445090462e-05,
      "loss": 2.0327,
      "step": 7892
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.143601179122925,
      "learning_rate": 6.850567428300325e-05,
      "loss": 1.4214,
      "step": 7893
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4399179220199585,
      "learning_rate": 6.848290528822416e-05,
      "loss": 1.3101,
      "step": 7894
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8169519901275635,
      "learning_rate": 6.846013810743515e-05,
      "loss": 1.9964,
      "step": 7895
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.528741478919983,
      "learning_rate": 6.843737274194664e-05,
      "loss": 1.6569,
      "step": 7896
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1330980062484741,
      "learning_rate": 6.841460919306893e-05,
      "loss": 0.8424,
      "step": 7897
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.254775047302246,
      "learning_rate": 6.839184746211209e-05,
      "loss": 1.8999,
      "step": 7898
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1891402006149292,
      "learning_rate": 6.836908755038629e-05,
      "loss": 1.2315,
      "step": 7899
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.16093111038208,
      "learning_rate": 6.83463294592015e-05,
      "loss": 1.8969,
      "step": 7900
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4272637367248535,
      "learning_rate": 6.832357318986753e-05,
      "loss": 2.0653,
      "step": 7901
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.417136311531067,
      "learning_rate": 6.830081874369412e-05,
      "loss": 1.3733,
      "step": 7902
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3603657484054565,
      "learning_rate": 6.827806612199097e-05,
      "loss": 1.4931,
      "step": 7903
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3144290447235107,
      "learning_rate": 6.825531532606759e-05,
      "loss": 1.7596,
      "step": 7904
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3957713842391968,
      "learning_rate": 6.823256635723343e-05,
      "loss": 1.979,
      "step": 7905
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9936352968215942,
      "learning_rate": 6.820981921679786e-05,
      "loss": 1.0692,
      "step": 7906
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.592134714126587,
      "learning_rate": 6.818707390607001e-05,
      "loss": 1.3224,
      "step": 7907
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3677217960357666,
      "learning_rate": 6.816433042635908e-05,
      "loss": 1.6884,
      "step": 7908
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5210630893707275,
      "learning_rate": 6.814158877897411e-05,
      "loss": 1.0862,
      "step": 7909
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9697082042694092,
      "learning_rate": 6.811884896522397e-05,
      "loss": 1.6155,
      "step": 7910
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3255631923675537,
      "learning_rate": 6.809611098641744e-05,
      "loss": 1.5484,
      "step": 7911
    },
    {
      "epoch": 0.6,
      "grad_norm": 8.577953338623047,
      "learning_rate": 6.807337484386332e-05,
      "loss": 2.4978,
      "step": 7912
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7098983526229858,
      "learning_rate": 6.805064053887007e-05,
      "loss": 1.8398,
      "step": 7913
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5974336862564087,
      "learning_rate": 6.802790807274629e-05,
      "loss": 1.6688,
      "step": 7914
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9095330238342285,
      "learning_rate": 6.800517744680032e-05,
      "loss": 1.1512,
      "step": 7915
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0929208993911743,
      "learning_rate": 6.798244866234045e-05,
      "loss": 1.5487,
      "step": 7916
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2007997035980225,
      "learning_rate": 6.795972172067482e-05,
      "loss": 1.2215,
      "step": 7917
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.093463182449341,
      "learning_rate": 6.793699662311156e-05,
      "loss": 1.6593,
      "step": 7918
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3005567789077759,
      "learning_rate": 6.791427337095855e-05,
      "loss": 1.591,
      "step": 7919
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.537630558013916,
      "learning_rate": 6.789155196552372e-05,
      "loss": 1.5888,
      "step": 7920
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.9909519553184509,
      "learning_rate": 6.786883240811479e-05,
      "loss": 0.8091,
      "step": 7921
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.0033283233642578,
      "learning_rate": 6.784611470003937e-05,
      "loss": 1.6122,
      "step": 7922
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3981540203094482,
      "learning_rate": 6.782339884260501e-05,
      "loss": 1.3813,
      "step": 7923
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2435864210128784,
      "learning_rate": 6.780068483711919e-05,
      "loss": 1.0807,
      "step": 7924
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.1183444261550903,
      "learning_rate": 6.777797268488917e-05,
      "loss": 1.4752,
      "step": 7925
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4506471157073975,
      "learning_rate": 6.775526238722217e-05,
      "loss": 1.0024,
      "step": 7926
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9515725374221802,
      "learning_rate": 6.773255394542535e-05,
      "loss": 1.6569,
      "step": 7927
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2320644855499268,
      "learning_rate": 6.770984736080564e-05,
      "loss": 1.6684,
      "step": 7928
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.911728858947754,
      "learning_rate": 6.768714263466999e-05,
      "loss": 1.5747,
      "step": 7929
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4244165420532227,
      "learning_rate": 6.766443976832517e-05,
      "loss": 1.5075,
      "step": 7930
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.445168375968933,
      "learning_rate": 6.764173876307785e-05,
      "loss": 1.541,
      "step": 7931
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2348788976669312,
      "learning_rate": 6.761903962023459e-05,
      "loss": 1.6431,
      "step": 7932
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4568371772766113,
      "learning_rate": 6.75963423411019e-05,
      "loss": 1.738,
      "step": 7933
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5683481693267822,
      "learning_rate": 6.757364692698613e-05,
      "loss": 1.1127,
      "step": 7934
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.941610336303711,
      "learning_rate": 6.755095337919348e-05,
      "loss": 1.1623,
      "step": 7935
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.204438328742981,
      "learning_rate": 6.752826169903017e-05,
      "loss": 1.6458,
      "step": 7936
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.282340407371521,
      "learning_rate": 6.750557188780217e-05,
      "loss": 2.0015,
      "step": 7937
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1104153394699097,
      "learning_rate": 6.748288394681545e-05,
      "loss": 1.5783,
      "step": 7938
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8656132221221924,
      "learning_rate": 6.746019787737583e-05,
      "loss": 1.8558,
      "step": 7939
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5838533639907837,
      "learning_rate": 6.743751368078898e-05,
      "loss": 1.3642,
      "step": 7940
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.120683193206787,
      "learning_rate": 6.741483135836055e-05,
      "loss": 1.304,
      "step": 7941
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3153419494628906,
      "learning_rate": 6.739215091139607e-05,
      "loss": 1.4908,
      "step": 7942
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9019243717193604,
      "learning_rate": 6.736947234120086e-05,
      "loss": 1.5605,
      "step": 7943
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.4503273963928223,
      "learning_rate": 6.73467956490802e-05,
      "loss": 1.9226,
      "step": 7944
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0907912254333496,
      "learning_rate": 6.732412083633936e-05,
      "loss": 0.8725,
      "step": 7945
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3621774911880493,
      "learning_rate": 6.730144790428328e-05,
      "loss": 1.4863,
      "step": 7946
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3425263166427612,
      "learning_rate": 6.7278776854217e-05,
      "loss": 1.0423,
      "step": 7947
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.01790714263916,
      "learning_rate": 6.725610768744534e-05,
      "loss": 1.6462,
      "step": 7948
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5160033702850342,
      "learning_rate": 6.723344040527307e-05,
      "loss": 1.7835,
      "step": 7949
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.4448068141937256,
      "learning_rate": 6.721077500900476e-05,
      "loss": 1.7467,
      "step": 7950
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.475302815437317,
      "learning_rate": 6.718811149994501e-05,
      "loss": 1.6602,
      "step": 7951
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.264209508895874,
      "learning_rate": 6.716544987939818e-05,
      "loss": 1.9956,
      "step": 7952
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0776519775390625,
      "learning_rate": 6.71427901486686e-05,
      "loss": 1.4976,
      "step": 7953
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1956647634506226,
      "learning_rate": 6.71201323090605e-05,
      "loss": 1.5324,
      "step": 7954
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2511720657348633,
      "learning_rate": 6.709747636187789e-05,
      "loss": 1.5948,
      "step": 7955
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3830212354660034,
      "learning_rate": 6.707482230842478e-05,
      "loss": 1.3876,
      "step": 7956
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.635213851928711,
      "learning_rate": 6.70521701500051e-05,
      "loss": 1.9539,
      "step": 7957
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.658369541168213,
      "learning_rate": 6.702951988792255e-05,
      "loss": 1.6193,
      "step": 7958
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.446674108505249,
      "learning_rate": 6.700687152348077e-05,
      "loss": 1.5245,
      "step": 7959
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3212083578109741,
      "learning_rate": 6.698422505798338e-05,
      "loss": 1.4266,
      "step": 7960
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0488935708999634,
      "learning_rate": 6.696158049273373e-05,
      "loss": 0.9907,
      "step": 7961
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2376632690429688,
      "learning_rate": 6.69389378290352e-05,
      "loss": 1.2187,
      "step": 7962
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3976200819015503,
      "learning_rate": 6.691629706819102e-05,
      "loss": 1.8661,
      "step": 7963
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.302679419517517,
      "learning_rate": 6.68936582115042e-05,
      "loss": 0.9012,
      "step": 7964
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.9284827709198,
      "learning_rate": 6.687102126027782e-05,
      "loss": 1.7204,
      "step": 7965
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7075115442276,
      "learning_rate": 6.684838621581478e-05,
      "loss": 1.7303,
      "step": 7966
    },
    {
      "epoch": 0.61,
      "grad_norm": 4.279380798339844,
      "learning_rate": 6.682575307941781e-05,
      "loss": 1.1174,
      "step": 7967
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7311828136444092,
      "learning_rate": 6.680312185238955e-05,
      "loss": 1.8518,
      "step": 7968
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6951466798782349,
      "learning_rate": 6.678049253603265e-05,
      "loss": 1.3422,
      "step": 7969
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.421324372291565,
      "learning_rate": 6.675786513164948e-05,
      "loss": 1.3062,
      "step": 7970
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3971247673034668,
      "learning_rate": 6.673523964054241e-05,
      "loss": 1.4068,
      "step": 7971
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.896862506866455,
      "learning_rate": 6.67126160640137e-05,
      "loss": 1.9411,
      "step": 7972
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7334442138671875,
      "learning_rate": 6.668999440336535e-05,
      "loss": 1.8355,
      "step": 7973
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5717202425003052,
      "learning_rate": 6.666737465989948e-05,
      "loss": 1.4722,
      "step": 7974
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0391228199005127,
      "learning_rate": 6.664475683491796e-05,
      "loss": 1.4522,
      "step": 7975
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.6825084686279297,
      "learning_rate": 6.662214092972256e-05,
      "loss": 1.5987,
      "step": 7976
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.807013750076294,
      "learning_rate": 6.659952694561495e-05,
      "loss": 2.253,
      "step": 7977
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1119500398635864,
      "learning_rate": 6.657691488389673e-05,
      "loss": 1.1886,
      "step": 7978
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3654752969741821,
      "learning_rate": 6.65543047458693e-05,
      "loss": 1.5051,
      "step": 7979
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9061058759689331,
      "learning_rate": 6.653169653283406e-05,
      "loss": 1.1248,
      "step": 7980
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3479647636413574,
      "learning_rate": 6.650909024609223e-05,
      "loss": 2.3563,
      "step": 7981
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7256848812103271,
      "learning_rate": 6.648648588694488e-05,
      "loss": 0.8948,
      "step": 7982
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9679999351501465,
      "learning_rate": 6.646388345669306e-05,
      "loss": 1.3108,
      "step": 7983
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3182789087295532,
      "learning_rate": 6.644128295663772e-05,
      "loss": 2.1119,
      "step": 7984
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5494651794433594,
      "learning_rate": 6.641868438807955e-05,
      "loss": 1.3297,
      "step": 7985
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2410285472869873,
      "learning_rate": 6.639608775231929e-05,
      "loss": 1.6692,
      "step": 7986
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9440972805023193,
      "learning_rate": 6.637349305065752e-05,
      "loss": 1.0633,
      "step": 7987
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.781466007232666,
      "learning_rate": 6.635090028439464e-05,
      "loss": 1.7092,
      "step": 7988
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9292125701904297,
      "learning_rate": 6.632830945483099e-05,
      "loss": 1.3787,
      "step": 7989
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2274314165115356,
      "learning_rate": 6.630572056326687e-05,
      "loss": 1.1107,
      "step": 7990
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9351422190666199,
      "learning_rate": 6.62831336110024e-05,
      "loss": 1.1744,
      "step": 7991
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2759976387023926,
      "learning_rate": 6.626054859933749e-05,
      "loss": 1.0272,
      "step": 7992
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.225205421447754,
      "learning_rate": 6.623796552957212e-05,
      "loss": 1.4642,
      "step": 7993
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2296572923660278,
      "learning_rate": 6.621538440300608e-05,
      "loss": 0.5843,
      "step": 7994
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0123347043991089,
      "learning_rate": 6.6192805220939e-05,
      "loss": 0.6081,
      "step": 7995
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5468242168426514,
      "learning_rate": 6.617022798467045e-05,
      "loss": 1.2192,
      "step": 7996
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8584903478622437,
      "learning_rate": 6.614765269549993e-05,
      "loss": 1.9599,
      "step": 7997
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.358498454093933,
      "learning_rate": 6.61250793547267e-05,
      "loss": 1.2253,
      "step": 7998
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6894670724868774,
      "learning_rate": 6.610250796365002e-05,
      "loss": 1.477,
      "step": 7999
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8861290216445923,
      "learning_rate": 6.607993852356906e-05,
      "loss": 1.5958,
      "step": 8000
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4760135412216187,
      "learning_rate": 6.605737103578274e-05,
      "loss": 1.3402,
      "step": 8001
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.7873412370681763,
      "learning_rate": 6.603480550158995e-05,
      "loss": 0.6964,
      "step": 8002
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.224607229232788,
      "learning_rate": 6.601224192228954e-05,
      "loss": 1.4346,
      "step": 8003
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.879957437515259,
      "learning_rate": 6.598968029918012e-05,
      "loss": 2.3232,
      "step": 8004
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8099925518035889,
      "learning_rate": 6.59671206335602e-05,
      "loss": 1.6129,
      "step": 8005
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6867693662643433,
      "learning_rate": 6.594456292672835e-05,
      "loss": 1.5637,
      "step": 8006
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0335813760757446,
      "learning_rate": 6.592200717998273e-05,
      "loss": 1.0477,
      "step": 8007
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3600491285324097,
      "learning_rate": 6.589945339462169e-05,
      "loss": 1.6629,
      "step": 8008
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7559958696365356,
      "learning_rate": 6.587690157194326e-05,
      "loss": 1.9362,
      "step": 8009
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.9021630883216858,
      "learning_rate": 6.585435171324545e-05,
      "loss": 1.0307,
      "step": 8010
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3591747283935547,
      "learning_rate": 6.583180381982612e-05,
      "loss": 1.1176,
      "step": 8011
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5991554260253906,
      "learning_rate": 6.580925789298307e-05,
      "loss": 1.0926,
      "step": 8012
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2465070486068726,
      "learning_rate": 6.578671393401387e-05,
      "loss": 1.2812,
      "step": 8013
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1649646759033203,
      "learning_rate": 6.576417194421612e-05,
      "loss": 1.7869,
      "step": 8014
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1764594316482544,
      "learning_rate": 6.574163192488725e-05,
      "loss": 1.465,
      "step": 8015
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.8379302024841309,
      "learning_rate": 6.57190938773245e-05,
      "loss": 0.9662,
      "step": 8016
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.304100513458252,
      "learning_rate": 6.56965578028251e-05,
      "loss": 1.0046,
      "step": 8017
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6298329830169678,
      "learning_rate": 6.567402370268618e-05,
      "loss": 1.5849,
      "step": 8018
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2249152660369873,
      "learning_rate": 6.565149157820465e-05,
      "loss": 0.8029,
      "step": 8019
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.350790023803711,
      "learning_rate": 6.562896143067734e-05,
      "loss": 1.5856,
      "step": 8020
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5753676891326904,
      "learning_rate": 6.560643326140108e-05,
      "loss": 1.7082,
      "step": 8021
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.956241250038147,
      "learning_rate": 6.558390707167238e-05,
      "loss": 1.31,
      "step": 8022
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5240751504898071,
      "learning_rate": 6.556138286278785e-05,
      "loss": 0.5615,
      "step": 8023
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6844629049301147,
      "learning_rate": 6.553886063604387e-05,
      "loss": 1.2624,
      "step": 8024
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4323790073394775,
      "learning_rate": 6.551634039273666e-05,
      "loss": 1.4058,
      "step": 8025
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.012590765953064,
      "learning_rate": 6.549382213416244e-05,
      "loss": 0.8704,
      "step": 8026
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9551643133163452,
      "learning_rate": 6.54713058616173e-05,
      "loss": 1.4094,
      "step": 8027
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2812256813049316,
      "learning_rate": 6.544879157639712e-05,
      "loss": 1.7077,
      "step": 8028
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1665968894958496,
      "learning_rate": 6.542627927979771e-05,
      "loss": 1.5171,
      "step": 8029
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3922826051712036,
      "learning_rate": 6.540376897311489e-05,
      "loss": 1.7315,
      "step": 8030
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5558395385742188,
      "learning_rate": 6.538126065764411e-05,
      "loss": 1.1282,
      "step": 8031
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6910138130187988,
      "learning_rate": 6.535875433468098e-05,
      "loss": 0.8885,
      "step": 8032
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2145622968673706,
      "learning_rate": 6.533625000552083e-05,
      "loss": 1.3391,
      "step": 8033
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1096713542938232,
      "learning_rate": 6.531374767145886e-05,
      "loss": 1.7594,
      "step": 8034
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1784486770629883,
      "learning_rate": 6.529124733379024e-05,
      "loss": 1.5881,
      "step": 8035
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6239252090454102,
      "learning_rate": 6.526874899381006e-05,
      "loss": 1.1849,
      "step": 8036
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7109942436218262,
      "learning_rate": 6.524625265281315e-05,
      "loss": 1.6408,
      "step": 8037
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7262200117111206,
      "learning_rate": 6.522375831209429e-05,
      "loss": 1.2983,
      "step": 8038
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6597477197647095,
      "learning_rate": 6.520126597294827e-05,
      "loss": 1.5559,
      "step": 8039
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3174105882644653,
      "learning_rate": 6.517877563666951e-05,
      "loss": 1.238,
      "step": 8040
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.2558705806732178,
      "learning_rate": 6.515628730455257e-05,
      "loss": 1.1922,
      "step": 8041
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.0836657285690308,
      "learning_rate": 6.513380097789172e-05,
      "loss": 1.1772,
      "step": 8042
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2781164646148682,
      "learning_rate": 6.51113166579812e-05,
      "loss": 1.3914,
      "step": 8043
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8752607107162476,
      "learning_rate": 6.50888343461151e-05,
      "loss": 1.2422,
      "step": 8044
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.139162302017212,
      "learning_rate": 6.506635404358745e-05,
      "loss": 1.1741,
      "step": 8045
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5995171070098877,
      "learning_rate": 6.504387575169205e-05,
      "loss": 2.1285,
      "step": 8046
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6047024726867676,
      "learning_rate": 6.502139947172272e-05,
      "loss": 1.2989,
      "step": 8047
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1177616119384766,
      "learning_rate": 6.499892520497308e-05,
      "loss": 1.1861,
      "step": 8048
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.071009874343872,
      "learning_rate": 6.49764529527366e-05,
      "loss": 1.6446,
      "step": 8049
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6814525127410889,
      "learning_rate": 6.495398271630675e-05,
      "loss": 0.9707,
      "step": 8050
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.480954885482788,
      "learning_rate": 6.493151449697683e-05,
      "loss": 1.7889,
      "step": 8051
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.37234365940094,
      "learning_rate": 6.490904829603996e-05,
      "loss": 1.5568,
      "step": 8052
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2458137273788452,
      "learning_rate": 6.488658411478923e-05,
      "loss": 1.7163,
      "step": 8053
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6157424449920654,
      "learning_rate": 6.48641219545176e-05,
      "loss": 1.4561,
      "step": 8054
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.696424961090088,
      "learning_rate": 6.484166181651785e-05,
      "loss": 0.8526,
      "step": 8055
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.608171820640564,
      "learning_rate": 6.481920370208274e-05,
      "loss": 1.468,
      "step": 8056
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.030200481414795,
      "learning_rate": 6.479674761250486e-05,
      "loss": 1.0184,
      "step": 8057
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3971633911132812,
      "learning_rate": 6.477429354907662e-05,
      "loss": 2.0704,
      "step": 8058
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1731194257736206,
      "learning_rate": 6.475184151309044e-05,
      "loss": 1.6716,
      "step": 8059
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.1196708679199219,
      "learning_rate": 6.472939150583858e-05,
      "loss": 1.8476,
      "step": 8060
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4569075107574463,
      "learning_rate": 6.470694352861312e-05,
      "loss": 1.5531,
      "step": 8061
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9552465677261353,
      "learning_rate": 6.468449758270608e-05,
      "loss": 1.5702,
      "step": 8062
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8988884687423706,
      "learning_rate": 6.466205366940941e-05,
      "loss": 1.5394,
      "step": 8063
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1607636213302612,
      "learning_rate": 6.463961179001478e-05,
      "loss": 0.9528,
      "step": 8064
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9614865779876709,
      "learning_rate": 6.461717194581393e-05,
      "loss": 1.008,
      "step": 8065
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7940385341644287,
      "learning_rate": 6.459473413809842e-05,
      "loss": 1.2037,
      "step": 8066
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7266504764556885,
      "learning_rate": 6.457229836815957e-05,
      "loss": 1.3005,
      "step": 8067
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3658004999160767,
      "learning_rate": 6.454986463728876e-05,
      "loss": 1.0471,
      "step": 8068
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4974958896636963,
      "learning_rate": 6.452743294677724e-05,
      "loss": 0.994,
      "step": 8069
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4288709163665771,
      "learning_rate": 6.450500329791597e-05,
      "loss": 1.4969,
      "step": 8070
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2644829750061035,
      "learning_rate": 6.448257569199594e-05,
      "loss": 1.4568,
      "step": 8071
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2752494812011719,
      "learning_rate": 6.446015013030802e-05,
      "loss": 1.0319,
      "step": 8072
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6718945503234863,
      "learning_rate": 6.443772661414288e-05,
      "loss": 1.4884,
      "step": 8073
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7604933977127075,
      "learning_rate": 6.441530514479119e-05,
      "loss": 1.9513,
      "step": 8074
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0249927043914795,
      "learning_rate": 6.439288572354338e-05,
      "loss": 1.7327,
      "step": 8075
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3893609046936035,
      "learning_rate": 6.437046835168985e-05,
      "loss": 1.4068,
      "step": 8076
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1462527513504028,
      "learning_rate": 6.43480530305208e-05,
      "loss": 1.895,
      "step": 8077
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1247478723526,
      "learning_rate": 6.432563976132644e-05,
      "loss": 1.0129,
      "step": 8078
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.398314118385315,
      "learning_rate": 6.430322854539668e-05,
      "loss": 1.1844,
      "step": 8079
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.665247917175293,
      "learning_rate": 6.428081938402149e-05,
      "loss": 1.0975,
      "step": 8080
    },
    {
      "epoch": 0.62,
      "grad_norm": 4.890291690826416,
      "learning_rate": 6.425841227849065e-05,
      "loss": 2.2205,
      "step": 8081
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5723767280578613,
      "learning_rate": 6.423600723009377e-05,
      "loss": 1.288,
      "step": 8082
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.036797523498535,
      "learning_rate": 6.42136042401204e-05,
      "loss": 1.3346,
      "step": 8083
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.049332618713379,
      "learning_rate": 6.419120330986002e-05,
      "loss": 1.0155,
      "step": 8084
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1365928649902344,
      "learning_rate": 6.416880444060185e-05,
      "loss": 1.5954,
      "step": 8085
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8292579650878906,
      "learning_rate": 6.41464076336351e-05,
      "loss": 1.5179,
      "step": 8086
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.213891863822937,
      "learning_rate": 6.412401289024888e-05,
      "loss": 1.1489,
      "step": 8087
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7847118377685547,
      "learning_rate": 6.410162021173207e-05,
      "loss": 1.8467,
      "step": 8088
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2038264274597168,
      "learning_rate": 6.407922959937355e-05,
      "loss": 1.3422,
      "step": 8089
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2617857456207275,
      "learning_rate": 6.405684105446202e-05,
      "loss": 1.1736,
      "step": 8090
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9222767949104309,
      "learning_rate": 6.403445457828603e-05,
      "loss": 1.7081,
      "step": 8091
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7031636238098145,
      "learning_rate": 6.401207017213406e-05,
      "loss": 1.1735,
      "step": 8092
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8785691261291504,
      "learning_rate": 6.398968783729453e-05,
      "loss": 1.6482,
      "step": 8093
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.688772201538086,
      "learning_rate": 6.396730757505559e-05,
      "loss": 1.1433,
      "step": 8094
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.229322075843811,
      "learning_rate": 6.394492938670538e-05,
      "loss": 1.5976,
      "step": 8095
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1461679935455322,
      "learning_rate": 6.392255327353192e-05,
      "loss": 1.137,
      "step": 8096
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0210461616516113,
      "learning_rate": 6.390017923682304e-05,
      "loss": 1.4874,
      "step": 8097
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4269053936004639,
      "learning_rate": 6.387780727786651e-05,
      "loss": 1.8317,
      "step": 8098
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.187490701675415,
      "learning_rate": 6.385543739795002e-05,
      "loss": 1.8434,
      "step": 8099
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7277430295944214,
      "learning_rate": 6.383306959836097e-05,
      "loss": 1.255,
      "step": 8100
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.182020664215088,
      "learning_rate": 6.381070388038684e-05,
      "loss": 1.3086,
      "step": 8101
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1505895853042603,
      "learning_rate": 6.378834024531492e-05,
      "loss": 0.7549,
      "step": 8102
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1719751358032227,
      "learning_rate": 6.376597869443231e-05,
      "loss": 0.9963,
      "step": 8103
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9028276205062866,
      "learning_rate": 6.374361922902606e-05,
      "loss": 1.602,
      "step": 8104
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9236329793930054,
      "learning_rate": 6.372126185038313e-05,
      "loss": 1.1616,
      "step": 8105
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0549207925796509,
      "learning_rate": 6.369890655979024e-05,
      "loss": 1.1042,
      "step": 8106
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7243800163269043,
      "learning_rate": 6.367655335853413e-05,
      "loss": 1.2185,
      "step": 8107
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.469817042350769,
      "learning_rate": 6.365420224790133e-05,
      "loss": 1.3717,
      "step": 8108
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3037117719650269,
      "learning_rate": 6.363185322917826e-05,
      "loss": 1.6536,
      "step": 8109
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3815131187438965,
      "learning_rate": 6.360950630365126e-05,
      "loss": 0.9589,
      "step": 8110
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.939901351928711,
      "learning_rate": 6.358716147260653e-05,
      "loss": 1.3204,
      "step": 8111
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6057180166244507,
      "learning_rate": 6.35648187373301e-05,
      "loss": 1.3828,
      "step": 8112
    },
    {
      "epoch": 0.62,
      "grad_norm": 8.356213569641113,
      "learning_rate": 6.354247809910797e-05,
      "loss": 2.2946,
      "step": 8113
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4247326850891113,
      "learning_rate": 6.352013955922598e-05,
      "loss": 1.7715,
      "step": 8114
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7082692384719849,
      "learning_rate": 6.349780311896977e-05,
      "loss": 1.5201,
      "step": 8115
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2757970094680786,
      "learning_rate": 6.347546877962498e-05,
      "loss": 1.1384,
      "step": 8116
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.7761390209198,
      "learning_rate": 6.34531365424771e-05,
      "loss": 1.6932,
      "step": 8117
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7522385120391846,
      "learning_rate": 6.343080640881145e-05,
      "loss": 1.4901,
      "step": 8118
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.01865553855896,
      "learning_rate": 6.340847837991324e-05,
      "loss": 1.2423,
      "step": 8119
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3472280502319336,
      "learning_rate": 6.338615245706765e-05,
      "loss": 1.7461,
      "step": 8120
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.008671998977661,
      "learning_rate": 6.336382864155957e-05,
      "loss": 1.7419,
      "step": 8121
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.178481101989746,
      "learning_rate": 6.334150693467391e-05,
      "loss": 1.3167,
      "step": 8122
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7996691465377808,
      "learning_rate": 6.331918733769544e-05,
      "loss": 1.1058,
      "step": 8123
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5420703887939453,
      "learning_rate": 6.329686985190871e-05,
      "loss": 1.7987,
      "step": 8124
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0698193311691284,
      "learning_rate": 6.327455447859827e-05,
      "loss": 1.4705,
      "step": 8125
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.985623300075531,
      "learning_rate": 6.325224121904852e-05,
      "loss": 1.6837,
      "step": 8126
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0221571922302246,
      "learning_rate": 6.322993007454368e-05,
      "loss": 0.964,
      "step": 8127
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.019688367843628,
      "learning_rate": 6.320762104636786e-05,
      "loss": 1.5279,
      "step": 8128
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4665459394454956,
      "learning_rate": 6.31853141358051e-05,
      "loss": 1.6653,
      "step": 8129
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.9563636183738708,
      "learning_rate": 6.316300934413935e-05,
      "loss": 0.9787,
      "step": 8130
    },
    {
      "epoch": 0.62,
      "grad_norm": 5.367278099060059,
      "learning_rate": 6.314070667265428e-05,
      "loss": 1.7215,
      "step": 8131
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6361926794052124,
      "learning_rate": 6.311840612263358e-05,
      "loss": 1.6448,
      "step": 8132
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5926399230957031,
      "learning_rate": 6.30961076953608e-05,
      "loss": 1.2743,
      "step": 8133
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9925259351730347,
      "learning_rate": 6.30738113921193e-05,
      "loss": 1.1635,
      "step": 8134
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5482372045516968,
      "learning_rate": 6.305151721419238e-05,
      "loss": 1.8441,
      "step": 8135
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.492125391960144,
      "learning_rate": 6.302922516286323e-05,
      "loss": 1.3397,
      "step": 8136
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1940394639968872,
      "learning_rate": 6.300693523941482e-05,
      "loss": 1.2416,
      "step": 8137
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0343759059906006,
      "learning_rate": 6.298464744513008e-05,
      "loss": 0.7281,
      "step": 8138
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1536895036697388,
      "learning_rate": 6.296236178129187e-05,
      "loss": 1.1937,
      "step": 8139
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8312710523605347,
      "learning_rate": 6.294007824918276e-05,
      "loss": 1.3394,
      "step": 8140
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.371976375579834,
      "learning_rate": 6.291779685008534e-05,
      "loss": 1.4315,
      "step": 8141
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9098718166351318,
      "learning_rate": 6.289551758528209e-05,
      "loss": 1.3327,
      "step": 8142
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1634974479675293,
      "learning_rate": 6.287324045605518e-05,
      "loss": 1.297,
      "step": 8143
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.962910532951355,
      "learning_rate": 6.285096546368689e-05,
      "loss": 0.8473,
      "step": 8144
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1732271909713745,
      "learning_rate": 6.282869260945928e-05,
      "loss": 1.2479,
      "step": 8145
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.180788278579712,
      "learning_rate": 6.28064218946542e-05,
      "loss": 1.1208,
      "step": 8146
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1537988185882568,
      "learning_rate": 6.278415332055349e-05,
      "loss": 1.1083,
      "step": 8147
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.31859290599823,
      "learning_rate": 6.27618868884389e-05,
      "loss": 1.2723,
      "step": 8148
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3743102550506592,
      "learning_rate": 6.27396225995919e-05,
      "loss": 1.3099,
      "step": 8149
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.216549038887024,
      "learning_rate": 6.271736045529398e-05,
      "loss": 1.6209,
      "step": 8150
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.53413987159729,
      "learning_rate": 6.269510045682647e-05,
      "loss": 1.2491,
      "step": 8151
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7761566638946533,
      "learning_rate": 6.267284260547049e-05,
      "loss": 1.5013,
      "step": 8152
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2252713441848755,
      "learning_rate": 6.265058690250714e-05,
      "loss": 1.2705,
      "step": 8153
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9030816555023193,
      "learning_rate": 6.262833334921741e-05,
      "loss": 1.8436,
      "step": 8154
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3477387428283691,
      "learning_rate": 6.260608194688206e-05,
      "loss": 0.9102,
      "step": 8155
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2168595790863037,
      "learning_rate": 6.258383269678181e-05,
      "loss": 1.7163,
      "step": 8156
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9160816669464111,
      "learning_rate": 6.256158560019727e-05,
      "loss": 1.0519,
      "step": 8157
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8631222248077393,
      "learning_rate": 6.25393406584088e-05,
      "loss": 1.3466,
      "step": 8158
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.8908939361572266,
      "learning_rate": 6.251709787269679e-05,
      "loss": 1.0967,
      "step": 8159
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6291255950927734,
      "learning_rate": 6.249485724434146e-05,
      "loss": 1.2087,
      "step": 8160
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1580959558486938,
      "learning_rate": 6.24726187746228e-05,
      "loss": 1.6648,
      "step": 8161
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4001305103302002,
      "learning_rate": 6.245038246482081e-05,
      "loss": 1.217,
      "step": 8162
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4543722867965698,
      "learning_rate": 6.242814831621537e-05,
      "loss": 1.2812,
      "step": 8163
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4030431509017944,
      "learning_rate": 6.24059163300861e-05,
      "loss": 1.8298,
      "step": 8164
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.625826358795166,
      "learning_rate": 6.23836865077126e-05,
      "loss": 1.3288,
      "step": 8165
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6624823808670044,
      "learning_rate": 6.236145885037438e-05,
      "loss": 1.5487,
      "step": 8166
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1044272184371948,
      "learning_rate": 6.23392333593507e-05,
      "loss": 1.038,
      "step": 8167
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4375783205032349,
      "learning_rate": 6.231701003592079e-05,
      "loss": 0.7889,
      "step": 8168
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4823983907699585,
      "learning_rate": 6.229478888136373e-05,
      "loss": 1.2813,
      "step": 8169
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1852359771728516,
      "learning_rate": 6.227256989695848e-05,
      "loss": 1.055,
      "step": 8170
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.6070754528045654,
      "learning_rate": 6.225035308398385e-05,
      "loss": 1.6359,
      "step": 8171
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.822174072265625,
      "learning_rate": 6.22281384437186e-05,
      "loss": 1.573,
      "step": 8172
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.018939256668091,
      "learning_rate": 6.220592597744123e-05,
      "loss": 1.893,
      "step": 8173
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.647915840148926,
      "learning_rate": 6.218371568643026e-05,
      "loss": 1.6559,
      "step": 8174
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.044881820678711,
      "learning_rate": 6.216150757196402e-05,
      "loss": 0.7791,
      "step": 8175
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.28187894821167,
      "learning_rate": 6.213930163532064e-05,
      "loss": 1.8637,
      "step": 8176
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.301390528678894,
      "learning_rate": 6.211709787777826e-05,
      "loss": 1.4262,
      "step": 8177
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6897083520889282,
      "learning_rate": 6.209489630061487e-05,
      "loss": 1.2806,
      "step": 8178
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5503214597702026,
      "learning_rate": 6.207269690510823e-05,
      "loss": 1.528,
      "step": 8179
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7477740049362183,
      "learning_rate": 6.205049969253605e-05,
      "loss": 1.1338,
      "step": 8180
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6478760242462158,
      "learning_rate": 6.202830466417595e-05,
      "loss": 1.8458,
      "step": 8181
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.0572484731674194,
      "learning_rate": 6.200611182130534e-05,
      "loss": 1.2382,
      "step": 8182
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1380153894424438,
      "learning_rate": 6.198392116520156e-05,
      "loss": 1.2889,
      "step": 8183
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.743864893913269,
      "learning_rate": 6.196173269714185e-05,
      "loss": 1.2823,
      "step": 8184
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1601529121398926,
      "learning_rate": 6.193954641840318e-05,
      "loss": 1.3456,
      "step": 8185
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7149136066436768,
      "learning_rate": 6.191736233026259e-05,
      "loss": 1.4839,
      "step": 8186
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5631625652313232,
      "learning_rate": 6.18951804339969e-05,
      "loss": 1.2313,
      "step": 8187
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.535841226577759,
      "learning_rate": 6.187300073088277e-05,
      "loss": 2.2584,
      "step": 8188
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4066760540008545,
      "learning_rate": 6.185082322219675e-05,
      "loss": 1.6395,
      "step": 8189
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.987647771835327,
      "learning_rate": 6.182864790921537e-05,
      "loss": 2.022,
      "step": 8190
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5836933851242065,
      "learning_rate": 6.180647479321485e-05,
      "loss": 1.1643,
      "step": 8191
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.9058847427368164,
      "learning_rate": 6.178430387547142e-05,
      "loss": 1.7143,
      "step": 8192
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1882290840148926,
      "learning_rate": 6.176213515726117e-05,
      "loss": 1.3898,
      "step": 8193
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.991718053817749,
      "learning_rate": 6.173996863985999e-05,
      "loss": 1.1666,
      "step": 8194
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.001884937286377,
      "learning_rate": 6.171780432454369e-05,
      "loss": 1.9085,
      "step": 8195
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2479368448257446,
      "learning_rate": 6.169564221258804e-05,
      "loss": 1.0696,
      "step": 8196
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2541656494140625,
      "learning_rate": 6.16734823052685e-05,
      "loss": 1.4338,
      "step": 8197
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.743911862373352,
      "learning_rate": 6.165132460386051e-05,
      "loss": 1.246,
      "step": 8198
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1161556243896484,
      "learning_rate": 6.162916910963943e-05,
      "loss": 1.9704,
      "step": 8199
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1830675601959229,
      "learning_rate": 6.160701582388038e-05,
      "loss": 1.358,
      "step": 8200
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2579232454299927,
      "learning_rate": 6.158486474785846e-05,
      "loss": 1.6294,
      "step": 8201
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.520875334739685,
      "learning_rate": 6.156271588284854e-05,
      "loss": 1.459,
      "step": 8202
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.310960054397583,
      "learning_rate": 6.154056923012546e-05,
      "loss": 1.4287,
      "step": 8203
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3973838090896606,
      "learning_rate": 6.151842479096384e-05,
      "loss": 1.2322,
      "step": 8204
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1032592058181763,
      "learning_rate": 6.149628256663827e-05,
      "loss": 1.1745,
      "step": 8205
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5223076343536377,
      "learning_rate": 6.14741425584231e-05,
      "loss": 1.6145,
      "step": 8206
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6474391222000122,
      "learning_rate": 6.14520047675927e-05,
      "loss": 0.3471,
      "step": 8207
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.446033239364624,
      "learning_rate": 6.142986919542117e-05,
      "loss": 1.6532,
      "step": 8208
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.975517749786377,
      "learning_rate": 6.140773584318253e-05,
      "loss": 1.8307,
      "step": 8209
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9077820777893066,
      "learning_rate": 6.13856047121507e-05,
      "loss": 2.0536,
      "step": 8210
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.181662917137146,
      "learning_rate": 6.136347580359948e-05,
      "loss": 1.3475,
      "step": 8211
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2086541652679443,
      "learning_rate": 6.134134911880248e-05,
      "loss": 1.3933,
      "step": 8212
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7291693687438965,
      "learning_rate": 6.131922465903321e-05,
      "loss": 1.5085,
      "step": 8213
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4700133800506592,
      "learning_rate": 6.129710242556512e-05,
      "loss": 1.5118,
      "step": 8214
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7419787645339966,
      "learning_rate": 6.12749824196714e-05,
      "loss": 1.476,
      "step": 8215
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1542534828186035,
      "learning_rate": 6.12528646426252e-05,
      "loss": 0.9244,
      "step": 8216
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7285144329071045,
      "learning_rate": 6.12307490956996e-05,
      "loss": 1.5361,
      "step": 8217
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1432796716690063,
      "learning_rate": 6.120863578016735e-05,
      "loss": 1.6586,
      "step": 8218
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0444955825805664,
      "learning_rate": 6.118652469730128e-05,
      "loss": 1.1518,
      "step": 8219
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5063079595565796,
      "learning_rate": 6.116441584837405e-05,
      "loss": 1.3642,
      "step": 8220
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0666289329528809,
      "learning_rate": 6.114230923465804e-05,
      "loss": 1.8373,
      "step": 8221
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1684744358062744,
      "learning_rate": 6.112020485742568e-05,
      "loss": 1.559,
      "step": 8222
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2474340200424194,
      "learning_rate": 6.109810271794922e-05,
      "loss": 1.9243,
      "step": 8223
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1178319454193115,
      "learning_rate": 6.10760028175007e-05,
      "loss": 1.079,
      "step": 8224
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3396739959716797,
      "learning_rate": 6.105390515735216e-05,
      "loss": 0.6945,
      "step": 8225
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.03021502494812,
      "learning_rate": 6.1031809738775433e-05,
      "loss": 1.9455,
      "step": 8226
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2248963117599487,
      "learning_rate": 6.10097165630422e-05,
      "loss": 1.4501,
      "step": 8227
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2181726694107056,
      "learning_rate": 6.098762563142406e-05,
      "loss": 1.1269,
      "step": 8228
    },
    {
      "epoch": 0.63,
      "grad_norm": 5.742468357086182,
      "learning_rate": 6.0965536945192556e-05,
      "loss": 2.2985,
      "step": 8229
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.620535135269165,
      "learning_rate": 6.0943450505618917e-05,
      "loss": 1.273,
      "step": 8230
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9547765254974365,
      "learning_rate": 6.092136631397436e-05,
      "loss": 1.2768,
      "step": 8231
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0604045391082764,
      "learning_rate": 6.089928437153003e-05,
      "loss": 1.2949,
      "step": 8232
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.228322148323059,
      "learning_rate": 6.087720467955678e-05,
      "loss": 0.999,
      "step": 8233
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5111024379730225,
      "learning_rate": 6.085512723932546e-05,
      "loss": 0.9085,
      "step": 8234
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9181188344955444,
      "learning_rate": 6.0833052052106764e-05,
      "loss": 1.5257,
      "step": 8235
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.702182650566101,
      "learning_rate": 6.0810979119171254e-05,
      "loss": 1.3586,
      "step": 8236
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4774842262268066,
      "learning_rate": 6.0788908441789304e-05,
      "loss": 1.5268,
      "step": 8237
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4236259460449219,
      "learning_rate": 6.0766840021231286e-05,
      "loss": 1.4484,
      "step": 8238
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.6998544931411743,
      "learning_rate": 6.074477385876728e-05,
      "loss": 0.7784,
      "step": 8239
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.105983018875122,
      "learning_rate": 6.072270995566738e-05,
      "loss": 1.6426,
      "step": 8240
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0005104541778564,
      "learning_rate": 6.070064831320149e-05,
      "loss": 1.0221,
      "step": 8241
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4352529048919678,
      "learning_rate": 6.0678588932639334e-05,
      "loss": 1.618,
      "step": 8242
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2734835147857666,
      "learning_rate": 6.0656531815250586e-05,
      "loss": 1.1984,
      "step": 8243
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1057831048965454,
      "learning_rate": 6.06344769623048e-05,
      "loss": 1.1251,
      "step": 8244
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5768890380859375,
      "learning_rate": 6.061242437507131e-05,
      "loss": 1.5799,
      "step": 8245
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1582670211791992,
      "learning_rate": 6.0590374054819346e-05,
      "loss": 1.3098,
      "step": 8246
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.898869514465332,
      "learning_rate": 6.056832600281812e-05,
      "loss": 0.9454,
      "step": 8247
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.352829933166504,
      "learning_rate": 6.0546280220336524e-05,
      "loss": 1.3391,
      "step": 8248
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4725711345672607,
      "learning_rate": 6.052423670864349e-05,
      "loss": 1.4662,
      "step": 8249
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7053072452545166,
      "learning_rate": 6.050219546900774e-05,
      "loss": 1.7198,
      "step": 8250
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0804524421691895,
      "learning_rate": 6.048015650269783e-05,
      "loss": 1.4421,
      "step": 8251
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.604867696762085,
      "learning_rate": 6.045811981098224e-05,
      "loss": 1.5019,
      "step": 8252
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5657610893249512,
      "learning_rate": 6.043608539512937e-05,
      "loss": 1.3615,
      "step": 8253
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2939258813858032,
      "learning_rate": 6.041405325640739e-05,
      "loss": 1.2079,
      "step": 8254
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3409178256988525,
      "learning_rate": 6.039202339608432e-05,
      "loss": 1.1601,
      "step": 8255
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.089172601699829,
      "learning_rate": 6.0369995815428216e-05,
      "loss": 1.3299,
      "step": 8256
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.551099181175232,
      "learning_rate": 6.034797051570678e-05,
      "loss": 1.5123,
      "step": 8257
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.174899101257324,
      "learning_rate": 6.032594749818776e-05,
      "loss": 1.9416,
      "step": 8258
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.016254425048828,
      "learning_rate": 6.030392676413874e-05,
      "loss": 1.6895,
      "step": 8259
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6680175065994263,
      "learning_rate": 6.028190831482703e-05,
      "loss": 1.4636,
      "step": 8260
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3614892959594727,
      "learning_rate": 6.025989215151999e-05,
      "loss": 1.0028,
      "step": 8261
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.100400447845459,
      "learning_rate": 6.0237878275484816e-05,
      "loss": 0.8535,
      "step": 8262
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.441436529159546,
      "learning_rate": 6.0215866687988465e-05,
      "loss": 1.6927,
      "step": 8263
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2976024150848389,
      "learning_rate": 6.0193857390297816e-05,
      "loss": 1.7942,
      "step": 8264
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.932466745376587,
      "learning_rate": 6.0171850383679676e-05,
      "loss": 1.9068,
      "step": 8265
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5770479440689087,
      "learning_rate": 6.014984566940072e-05,
      "loss": 1.2126,
      "step": 8266
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3010993003845215,
      "learning_rate": 6.0127843248727344e-05,
      "loss": 0.9412,
      "step": 8267
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7013334035873413,
      "learning_rate": 6.0105843122925955e-05,
      "loss": 1.45,
      "step": 8268
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7406413555145264,
      "learning_rate": 6.008384529326283e-05,
      "loss": 1.4879,
      "step": 8269
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4144840240478516,
      "learning_rate": 6.0061849761003994e-05,
      "loss": 0.8967,
      "step": 8270
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8583735227584839,
      "learning_rate": 6.003985652741546e-05,
      "loss": 1.9137,
      "step": 8271
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4240918159484863,
      "learning_rate": 6.00178655937631e-05,
      "loss": 1.6742,
      "step": 8272
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0423457622528076,
      "learning_rate": 5.9995876961312566e-05,
      "loss": 1.53,
      "step": 8273
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6127675771713257,
      "learning_rate": 5.997389063132941e-05,
      "loss": 1.4257,
      "step": 8274
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4154175519943237,
      "learning_rate": 5.995190660507915e-05,
      "loss": 1.6938,
      "step": 8275
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1666309833526611,
      "learning_rate": 5.992992488382702e-05,
      "loss": 1.0819,
      "step": 8276
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3241358995437622,
      "learning_rate": 5.9907945468838224e-05,
      "loss": 1.5183,
      "step": 8277
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1138336658477783,
      "learning_rate": 5.988596836137784e-05,
      "loss": 1.6115,
      "step": 8278
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.646220326423645,
      "learning_rate": 5.9863993562710694e-05,
      "loss": 1.7346,
      "step": 8279
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9497458934783936,
      "learning_rate": 5.9842021074101605e-05,
      "loss": 1.7303,
      "step": 8280
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2825995683670044,
      "learning_rate": 5.982005089681526e-05,
      "loss": 1.2289,
      "step": 8281
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.153317928314209,
      "learning_rate": 5.9798083032116114e-05,
      "loss": 1.2072,
      "step": 8282
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4681892395019531,
      "learning_rate": 5.977611748126855e-05,
      "loss": 1.2705,
      "step": 8283
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.769870400428772,
      "learning_rate": 5.975415424553683e-05,
      "loss": 1.4026,
      "step": 8284
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.607054591178894,
      "learning_rate": 5.973219332618504e-05,
      "loss": 2.1015,
      "step": 8285
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.333919882774353,
      "learning_rate": 5.971023472447719e-05,
      "loss": 1.4973,
      "step": 8286
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1697180271148682,
      "learning_rate": 5.9688278441677106e-05,
      "loss": 1.5845,
      "step": 8287
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8914875984191895,
      "learning_rate": 5.9666324479048476e-05,
      "loss": 2.3356,
      "step": 8288
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4536173343658447,
      "learning_rate": 5.964437283785489e-05,
      "loss": 1.6968,
      "step": 8289
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5059058666229248,
      "learning_rate": 5.962242351935985e-05,
      "loss": 1.3963,
      "step": 8290
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3963303565979004,
      "learning_rate": 5.960047652482659e-05,
      "loss": 1.3246,
      "step": 8291
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.929779052734375,
      "learning_rate": 5.95785318555183e-05,
      "loss": 2.0015,
      "step": 8292
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2084591388702393,
      "learning_rate": 5.955658951269807e-05,
      "loss": 1.6914,
      "step": 8293
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0164897441864014,
      "learning_rate": 5.9534649497628724e-05,
      "loss": 1.9597,
      "step": 8294
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3101706504821777,
      "learning_rate": 5.9512711811573106e-05,
      "loss": 1.7227,
      "step": 8295
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2241979837417603,
      "learning_rate": 5.949077645579384e-05,
      "loss": 1.4918,
      "step": 8296
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0393577814102173,
      "learning_rate": 5.9468843431553413e-05,
      "loss": 1.5434,
      "step": 8297
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1254688501358032,
      "learning_rate": 5.94469127401142e-05,
      "loss": 1.4524,
      "step": 8298
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.133255124092102,
      "learning_rate": 5.942498438273849e-05,
      "loss": 1.5059,
      "step": 8299
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.542062759399414,
      "learning_rate": 5.9403058360688305e-05,
      "loss": 1.7534,
      "step": 8300
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.514195442199707,
      "learning_rate": 5.9381134675225656e-05,
      "loss": 0.7858,
      "step": 8301
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5177333354949951,
      "learning_rate": 5.9359213327612416e-05,
      "loss": 1.3432,
      "step": 8302
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1968753337860107,
      "learning_rate": 5.933729431911019e-05,
      "loss": 1.5029,
      "step": 8303
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.0539089441299438,
      "learning_rate": 5.931537765098061e-05,
      "loss": 1.8262,
      "step": 8304
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6166797876358032,
      "learning_rate": 5.929346332448511e-05,
      "loss": 1.5249,
      "step": 8305
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5731691122055054,
      "learning_rate": 5.9271551340884957e-05,
      "loss": 1.5432,
      "step": 8306
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5150606632232666,
      "learning_rate": 5.9249641701441314e-05,
      "loss": 0.9756,
      "step": 8307
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.236879348754883,
      "learning_rate": 5.9227734407415246e-05,
      "loss": 1.2769,
      "step": 8308
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3480908870697021,
      "learning_rate": 5.9205829460067566e-05,
      "loss": 1.3625,
      "step": 8309
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5429728031158447,
      "learning_rate": 5.91839268606591e-05,
      "loss": 1.4433,
      "step": 8310
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.810151219367981,
      "learning_rate": 5.916202661045047e-05,
      "loss": 1.8117,
      "step": 8311
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5664464235305786,
      "learning_rate": 5.914012871070209e-05,
      "loss": 1.5352,
      "step": 8312
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.837895154953003,
      "learning_rate": 5.911823316267436e-05,
      "loss": 1.6882,
      "step": 8313
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3468904495239258,
      "learning_rate": 5.9096339967627534e-05,
      "loss": 1.3888,
      "step": 8314
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2211802005767822,
      "learning_rate": 5.907444912682163e-05,
      "loss": 1.9116,
      "step": 8315
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.3912627696990967,
      "learning_rate": 5.9052560641516586e-05,
      "loss": 1.2677,
      "step": 8316
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2995741367340088,
      "learning_rate": 5.9030674512972287e-05,
      "loss": 1.0641,
      "step": 8317
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.510152578353882,
      "learning_rate": 5.9008790742448294e-05,
      "loss": 1.2659,
      "step": 8318
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.835303544998169,
      "learning_rate": 5.8986909331204235e-05,
      "loss": 0.9537,
      "step": 8319
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.133309006690979,
      "learning_rate": 5.89650302804995e-05,
      "loss": 0.9167,
      "step": 8320
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0675392150878906,
      "learning_rate": 5.894315359159329e-05,
      "loss": 2.0622,
      "step": 8321
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.9952833652496338,
      "learning_rate": 5.892127926574478e-05,
      "loss": 1.2002,
      "step": 8322
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5431042909622192,
      "learning_rate": 5.8899407304212994e-05,
      "loss": 1.3722,
      "step": 8323
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.856520175933838,
      "learning_rate": 5.8877537708256744e-05,
      "loss": 1.7128,
      "step": 8324
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.548956036567688,
      "learning_rate": 5.885567047913474e-05,
      "loss": 1.6121,
      "step": 8325
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5751107931137085,
      "learning_rate": 5.883380561810563e-05,
      "loss": 0.8809,
      "step": 8326
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4684299230575562,
      "learning_rate": 5.8811943126427795e-05,
      "loss": 1.1339,
      "step": 8327
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0618183612823486,
      "learning_rate": 5.8790083005359576e-05,
      "loss": 1.2106,
      "step": 8328
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9593181014060974,
      "learning_rate": 5.876822525615917e-05,
      "loss": 0.7139,
      "step": 8329
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5324831008911133,
      "learning_rate": 5.874636988008457e-05,
      "loss": 1.5273,
      "step": 8330
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0014560222625732,
      "learning_rate": 5.87245168783937e-05,
      "loss": 1.5923,
      "step": 8331
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4865260124206543,
      "learning_rate": 5.870266625234435e-05,
      "loss": 1.5365,
      "step": 8332
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4137271642684937,
      "learning_rate": 5.868081800319412e-05,
      "loss": 1.6955,
      "step": 8333
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1159858703613281,
      "learning_rate": 5.8658972132200497e-05,
      "loss": 1.5999,
      "step": 8334
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8979754447937012,
      "learning_rate": 5.863712864062089e-05,
      "loss": 1.7927,
      "step": 8335
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9931282997131348,
      "learning_rate": 5.861528752971243e-05,
      "loss": 1.3163,
      "step": 8336
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2708696126937866,
      "learning_rate": 5.859344880073224e-05,
      "loss": 1.1606,
      "step": 8337
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.342939853668213,
      "learning_rate": 5.8571612454937316e-05,
      "loss": 1.5195,
      "step": 8338
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1870741844177246,
      "learning_rate": 5.854977849358441e-05,
      "loss": 1.3238,
      "step": 8339
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3626521825790405,
      "learning_rate": 5.8527946917930166e-05,
      "loss": 1.4377,
      "step": 8340
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1789835691452026,
      "learning_rate": 5.85061177292312e-05,
      "loss": 1.8164,
      "step": 8341
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.460486888885498,
      "learning_rate": 5.8484290928743815e-05,
      "loss": 1.1236,
      "step": 8342
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.7326390743255615,
      "learning_rate": 5.846246651772434e-05,
      "loss": 2.3239,
      "step": 8343
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.101921319961548,
      "learning_rate": 5.844064449742888e-05,
      "loss": 1.4679,
      "step": 8344
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2615885734558105,
      "learning_rate": 5.841882486911338e-05,
      "loss": 1.3598,
      "step": 8345
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3161195516586304,
      "learning_rate": 5.839700763403371e-05,
      "loss": 1.0398,
      "step": 8346
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0428497791290283,
      "learning_rate": 5.837519279344561e-05,
      "loss": 1.2842,
      "step": 8347
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.498313546180725,
      "learning_rate": 5.835338034860458e-05,
      "loss": 2.1319,
      "step": 8348
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8172250986099243,
      "learning_rate": 5.8331570300766106e-05,
      "loss": 1.3167,
      "step": 8349
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5357763767242432,
      "learning_rate": 5.8309762651185484e-05,
      "loss": 1.2234,
      "step": 8350
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.923667550086975,
      "learning_rate": 5.828795740111788e-05,
      "loss": 0.9241,
      "step": 8351
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.8994522094726562,
      "learning_rate": 5.8266154551818216e-05,
      "loss": 1.883,
      "step": 8352
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4764360189437866,
      "learning_rate": 5.82443541045415e-05,
      "loss": 1.0962,
      "step": 8353
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.326033592224121,
      "learning_rate": 5.8222556060542364e-05,
      "loss": 1.1361,
      "step": 8354
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4164631366729736,
      "learning_rate": 5.820076042107545e-05,
      "loss": 1.2404,
      "step": 8355
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9497103691101074,
      "learning_rate": 5.817896718739528e-05,
      "loss": 0.8555,
      "step": 8356
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.838109254837036,
      "learning_rate": 5.81571763607561e-05,
      "loss": 1.8873,
      "step": 8357
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2767099142074585,
      "learning_rate": 5.813538794241211e-05,
      "loss": 1.0628,
      "step": 8358
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4679056406021118,
      "learning_rate": 5.8113601933617436e-05,
      "loss": 1.6849,
      "step": 8359
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2239820957183838,
      "learning_rate": 5.8091818335625924e-05,
      "loss": 1.2284,
      "step": 8360
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3102593421936035,
      "learning_rate": 5.8070037149691314e-05,
      "loss": 1.5491,
      "step": 8361
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0504578351974487,
      "learning_rate": 5.804825837706731e-05,
      "loss": 1.2059,
      "step": 8362
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.234679698944092,
      "learning_rate": 5.802648201900733e-05,
      "loss": 1.8746,
      "step": 8363
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8296420574188232,
      "learning_rate": 5.800470807676478e-05,
      "loss": 1.0475,
      "step": 8364
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.354616403579712,
      "learning_rate": 5.7982936551592906e-05,
      "loss": 1.7103,
      "step": 8365
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.139151930809021,
      "learning_rate": 5.796116744474469e-05,
      "loss": 1.0848,
      "step": 8366
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.329920768737793,
      "learning_rate": 5.793940075747314e-05,
      "loss": 1.1477,
      "step": 8367
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8353629112243652,
      "learning_rate": 5.7917636491031065e-05,
      "loss": 1.2776,
      "step": 8368
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.2495267391204834,
      "learning_rate": 5.78958746466711e-05,
      "loss": 1.5624,
      "step": 8369
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.012447714805603,
      "learning_rate": 5.787411522564573e-05,
      "loss": 1.2113,
      "step": 8370
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5239111185073853,
      "learning_rate": 5.785235822920741e-05,
      "loss": 1.0665,
      "step": 8371
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0725277662277222,
      "learning_rate": 5.7830603658608276e-05,
      "loss": 1.6272,
      "step": 8372
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5354775190353394,
      "learning_rate": 5.7808851515100496e-05,
      "loss": 1.6931,
      "step": 8373
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8624326586723328,
      "learning_rate": 5.778710179993607e-05,
      "loss": 0.9041,
      "step": 8374
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1770331859588623,
      "learning_rate": 5.776535451436672e-05,
      "loss": 1.4899,
      "step": 8375
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.080212116241455,
      "learning_rate": 5.774360965964419e-05,
      "loss": 1.1368,
      "step": 8376
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3721109628677368,
      "learning_rate": 5.772186723702004e-05,
      "loss": 1.0845,
      "step": 8377
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4319074153900146,
      "learning_rate": 5.7700127247745625e-05,
      "loss": 1.1427,
      "step": 8378
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4936630725860596,
      "learning_rate": 5.767838969307226e-05,
      "loss": 1.2257,
      "step": 8379
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.230796456336975,
      "learning_rate": 5.765665457425102e-05,
      "loss": 0.6979,
      "step": 8380
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7049561738967896,
      "learning_rate": 5.763492189253286e-05,
      "loss": 1.3655,
      "step": 8381
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4865137338638306,
      "learning_rate": 5.761319164916866e-05,
      "loss": 1.4057,
      "step": 8382
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4677603244781494,
      "learning_rate": 5.759146384540916e-05,
      "loss": 1.3781,
      "step": 8383
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2763698101043701,
      "learning_rate": 5.756973848250484e-05,
      "loss": 1.2215,
      "step": 8384
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6695921421051025,
      "learning_rate": 5.7548015561706145e-05,
      "loss": 0.891,
      "step": 8385
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1351715326309204,
      "learning_rate": 5.7526295084263425e-05,
      "loss": 1.2177,
      "step": 8386
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5946134328842163,
      "learning_rate": 5.750457705142671e-05,
      "loss": 1.3211,
      "step": 8387
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2695144414901733,
      "learning_rate": 5.74828614644461e-05,
      "loss": 1.0161,
      "step": 8388
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4865672588348389,
      "learning_rate": 5.746114832457139e-05,
      "loss": 1.1267,
      "step": 8389
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.237766981124878,
      "learning_rate": 5.743943763305227e-05,
      "loss": 1.3934,
      "step": 8390
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4780282974243164,
      "learning_rate": 5.741772939113835e-05,
      "loss": 1.2687,
      "step": 8391
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.13883638381958,
      "learning_rate": 5.739602360007911e-05,
      "loss": 2.1087,
      "step": 8392
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.5730767250061035,
      "learning_rate": 5.737432026112375e-05,
      "loss": 1.9651,
      "step": 8393
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4473026990890503,
      "learning_rate": 5.7352619375521475e-05,
      "loss": 0.7881,
      "step": 8394
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4487563371658325,
      "learning_rate": 5.733092094452135e-05,
      "loss": 1.5142,
      "step": 8395
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8849231004714966,
      "learning_rate": 5.730922496937212e-05,
      "loss": 1.3832,
      "step": 8396
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.426995873451233,
      "learning_rate": 5.728753145132264e-05,
      "loss": 1.6003,
      "step": 8397
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.977069616317749,
      "learning_rate": 5.726584039162142e-05,
      "loss": 1.5974,
      "step": 8398
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.007913827896118,
      "learning_rate": 5.72441517915169e-05,
      "loss": 1.6843,
      "step": 8399
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.213789701461792,
      "learning_rate": 5.722246565225738e-05,
      "loss": 1.2936,
      "step": 8400
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5216822624206543,
      "learning_rate": 5.7200781975091065e-05,
      "loss": 1.869,
      "step": 8401
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8314619064331055,
      "learning_rate": 5.7179100761266006e-05,
      "loss": 1.2457,
      "step": 8402
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0349607467651367,
      "learning_rate": 5.715742201202997e-05,
      "loss": 1.6951,
      "step": 8403
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.9960149526596069,
      "learning_rate": 5.7135745728630764e-05,
      "loss": 0.9691,
      "step": 8404
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6359190940856934,
      "learning_rate": 5.711407191231602e-05,
      "loss": 1.3205,
      "step": 8405
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2081974744796753,
      "learning_rate": 5.709240056433316e-05,
      "loss": 1.8782,
      "step": 8406
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.1291875839233398,
      "learning_rate": 5.707073168592942e-05,
      "loss": 1.357,
      "step": 8407
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.552680253982544,
      "learning_rate": 5.7049065278352075e-05,
      "loss": 1.3068,
      "step": 8408
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3728117942810059,
      "learning_rate": 5.7027401342848076e-05,
      "loss": 0.9831,
      "step": 8409
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6797341108322144,
      "learning_rate": 5.700573988066433e-05,
      "loss": 1.6511,
      "step": 8410
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.480870008468628,
      "learning_rate": 5.6984080893047633e-05,
      "loss": 1.4734,
      "step": 8411
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6463929414749146,
      "learning_rate": 5.696242438124448e-05,
      "loss": 1.5451,
      "step": 8412
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7669463157653809,
      "learning_rate": 5.6940770346501406e-05,
      "loss": 1.5512,
      "step": 8413
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0494189262390137,
      "learning_rate": 5.691911879006474e-05,
      "loss": 1.0535,
      "step": 8414
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.070099353790283,
      "learning_rate": 5.6897469713180576e-05,
      "loss": 1.1395,
      "step": 8415
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6491148471832275,
      "learning_rate": 5.6875823117095025e-05,
      "loss": 1.2299,
      "step": 8416
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.563267469406128,
      "learning_rate": 5.685417900305393e-05,
      "loss": 1.284,
      "step": 8417
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6056065559387207,
      "learning_rate": 5.683253737230301e-05,
      "loss": 1.7043,
      "step": 8418
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0133929252624512,
      "learning_rate": 5.681089822608787e-05,
      "loss": 0.9869,
      "step": 8419
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3740355968475342,
      "learning_rate": 5.678926156565405e-05,
      "loss": 1.6084,
      "step": 8420
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0004305839538574,
      "learning_rate": 5.676762739224677e-05,
      "loss": 0.9508,
      "step": 8421
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5010011196136475,
      "learning_rate": 5.674599570711121e-05,
      "loss": 1.2331,
      "step": 8422
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2933666706085205,
      "learning_rate": 5.672436651149249e-05,
      "loss": 1.3191,
      "step": 8423
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7812151908874512,
      "learning_rate": 5.670273980663537e-05,
      "loss": 1.6644,
      "step": 8424
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.369269609451294,
      "learning_rate": 5.668111559378471e-05,
      "loss": 1.261,
      "step": 8425
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5163326263427734,
      "learning_rate": 5.665949387418503e-05,
      "loss": 1.4944,
      "step": 8426
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0814038515090942,
      "learning_rate": 5.663787464908077e-05,
      "loss": 0.9543,
      "step": 8427
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.646921992301941,
      "learning_rate": 5.6616257919716266e-05,
      "loss": 1.4981,
      "step": 8428
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7047971487045288,
      "learning_rate": 5.659464368733573e-05,
      "loss": 1.3156,
      "step": 8429
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2526756525039673,
      "learning_rate": 5.657303195318311e-05,
      "loss": 1.2898,
      "step": 8430
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2018938064575195,
      "learning_rate": 5.6551422718502334e-05,
      "loss": 1.4145,
      "step": 8431
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0810461044311523,
      "learning_rate": 5.652981598453715e-05,
      "loss": 1.9497,
      "step": 8432
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.419446349143982,
      "learning_rate": 5.650821175253109e-05,
      "loss": 1.3909,
      "step": 8433
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.369093179702759,
      "learning_rate": 5.648661002372768e-05,
      "loss": 2.045,
      "step": 8434
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4065035581588745,
      "learning_rate": 5.646501079937019e-05,
      "loss": 1.3674,
      "step": 8435
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.005336046218872,
      "learning_rate": 5.644341408070173e-05,
      "loss": 1.6571,
      "step": 8436
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9575365781784058,
      "learning_rate": 5.6421819868965355e-05,
      "loss": 1.9327,
      "step": 8437
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4651052951812744,
      "learning_rate": 5.640022816540398e-05,
      "loss": 1.8088,
      "step": 8438
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8875635862350464,
      "learning_rate": 5.6378638971260236e-05,
      "loss": 1.4438,
      "step": 8439
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2633992433547974,
      "learning_rate": 5.6357052287776765e-05,
      "loss": 1.2673,
      "step": 8440
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4272592067718506,
      "learning_rate": 5.6335468116196056e-05,
      "loss": 1.2628,
      "step": 8441
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.417416572570801,
      "learning_rate": 5.631388645776028e-05,
      "loss": 1.2715,
      "step": 8442
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.0829763412475586,
      "learning_rate": 5.629230731371171e-05,
      "loss": 1.1349,
      "step": 8443
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5356249809265137,
      "learning_rate": 5.627073068529225e-05,
      "loss": 1.4226,
      "step": 8444
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.278196096420288,
      "learning_rate": 5.6249156573743835e-05,
      "loss": 1.1151,
      "step": 8445
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.987568199634552,
      "learning_rate": 5.622758498030809e-05,
      "loss": 1.4042,
      "step": 8446
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.9826412200927734,
      "learning_rate": 5.62060159062267e-05,
      "loss": 1.3491,
      "step": 8447
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2810496091842651,
      "learning_rate": 5.618444935274097e-05,
      "loss": 1.1539,
      "step": 8448
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.0759278535842896,
      "learning_rate": 5.616288532109225e-05,
      "loss": 0.9459,
      "step": 8449
    },
    {
      "epoch": 0.64,
      "grad_norm": 4.313253879547119,
      "learning_rate": 5.6141323812521695e-05,
      "loss": 1.3369,
      "step": 8450
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3960819244384766,
      "learning_rate": 5.611976482827022e-05,
      "loss": 1.541,
      "step": 8451
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.092916488647461,
      "learning_rate": 5.609820836957871e-05,
      "loss": 1.1293,
      "step": 8452
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9700990915298462,
      "learning_rate": 5.60766544376879e-05,
      "loss": 1.9883,
      "step": 8453
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9778284430503845,
      "learning_rate": 5.605510303383831e-05,
      "loss": 1.2114,
      "step": 8454
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6707572937011719,
      "learning_rate": 5.6033554159270294e-05,
      "loss": 1.1193,
      "step": 8455
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3291871547698975,
      "learning_rate": 5.601200781522421e-05,
      "loss": 1.1891,
      "step": 8456
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4810320138931274,
      "learning_rate": 5.599046400294008e-05,
      "loss": 0.8481,
      "step": 8457
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5946623086929321,
      "learning_rate": 5.596892272365792e-05,
      "loss": 1.0789,
      "step": 8458
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3415488004684448,
      "learning_rate": 5.5947383978617584e-05,
      "loss": 1.3801,
      "step": 8459
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2845669984817505,
      "learning_rate": 5.592584776905869e-05,
      "loss": 1.0652,
      "step": 8460
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4049466848373413,
      "learning_rate": 5.590431409622081e-05,
      "loss": 1.3861,
      "step": 8461
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4084173440933228,
      "learning_rate": 5.588278296134335e-05,
      "loss": 1.1484,
      "step": 8462
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3665833473205566,
      "learning_rate": 5.586125436566554e-05,
      "loss": 1.1759,
      "step": 8463
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3769378662109375,
      "learning_rate": 5.58397283104264e-05,
      "loss": 0.7259,
      "step": 8464
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.4939374923706055,
      "learning_rate": 5.581820479686498e-05,
      "loss": 1.7394,
      "step": 8465
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0252635478973389,
      "learning_rate": 5.579668382622e-05,
      "loss": 1.3394,
      "step": 8466
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6675926446914673,
      "learning_rate": 5.5775165399730154e-05,
      "loss": 1.026,
      "step": 8467
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7492988109588623,
      "learning_rate": 5.5753649518633986e-05,
      "loss": 1.9117,
      "step": 8468
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.8949444890022278,
      "learning_rate": 5.573213618416979e-05,
      "loss": 0.9187,
      "step": 8469
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4582290649414062,
      "learning_rate": 5.571062539757581e-05,
      "loss": 1.9168,
      "step": 8470
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3301711082458496,
      "learning_rate": 5.5689117160090164e-05,
      "loss": 1.7874,
      "step": 8471
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.307938575744629,
      "learning_rate": 5.566761147295073e-05,
      "loss": 1.6075,
      "step": 8472
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2648210525512695,
      "learning_rate": 5.564610833739525e-05,
      "loss": 1.9415,
      "step": 8473
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0240113735198975,
      "learning_rate": 5.562460775466143e-05,
      "loss": 1.3572,
      "step": 8474
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7427918910980225,
      "learning_rate": 5.560310972598667e-05,
      "loss": 1.6025,
      "step": 8475
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.017049551010132,
      "learning_rate": 5.558161425260835e-05,
      "loss": 1.9799,
      "step": 8476
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.056488037109375,
      "learning_rate": 5.5560121335763685e-05,
      "loss": 1.1207,
      "step": 8477
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.097356081008911,
      "learning_rate": 5.5538630976689655e-05,
      "loss": 1.8561,
      "step": 8478
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2282415628433228,
      "learning_rate": 5.5517143176623176e-05,
      "loss": 1.2404,
      "step": 8479
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.199210524559021,
      "learning_rate": 5.549565793680105e-05,
      "loss": 1.5551,
      "step": 8480
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.3878397941589355,
      "learning_rate": 5.547417525845978e-05,
      "loss": 2.205,
      "step": 8481
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5338125228881836,
      "learning_rate": 5.545269514283592e-05,
      "loss": 1.2872,
      "step": 8482
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7258241176605225,
      "learning_rate": 5.543121759116572e-05,
      "loss": 1.1486,
      "step": 8483
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9165929555892944,
      "learning_rate": 5.540974260468529e-05,
      "loss": 1.1935,
      "step": 8484
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8926820755004883,
      "learning_rate": 5.538827018463069e-05,
      "loss": 1.5276,
      "step": 8485
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3000433444976807,
      "learning_rate": 5.5366800332237825e-05,
      "loss": 1.3028,
      "step": 8486
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1691991090774536,
      "learning_rate": 5.53453330487423e-05,
      "loss": 0.9955,
      "step": 8487
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7208834886550903,
      "learning_rate": 5.532386833537977e-05,
      "loss": 1.1673,
      "step": 8488
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9659923315048218,
      "learning_rate": 5.530240619338565e-05,
      "loss": 1.1979,
      "step": 8489
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.216670513153076,
      "learning_rate": 5.5280946623995146e-05,
      "loss": 1.7661,
      "step": 8490
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7546519041061401,
      "learning_rate": 5.525948962844345e-05,
      "loss": 2.0496,
      "step": 8491
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0250723361968994,
      "learning_rate": 5.523803520796551e-05,
      "loss": 1.548,
      "step": 8492
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0327837467193604,
      "learning_rate": 5.5216583363796113e-05,
      "loss": 1.1837,
      "step": 8493
    },
    {
      "epoch": 0.65,
      "grad_norm": 4.532380104064941,
      "learning_rate": 5.519513409716996e-05,
      "loss": 1.6985,
      "step": 8494
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5344574451446533,
      "learning_rate": 5.517368740932164e-05,
      "loss": 1.7253,
      "step": 8495
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0333566665649414,
      "learning_rate": 5.515224330148543e-05,
      "loss": 1.0717,
      "step": 8496
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4038816690444946,
      "learning_rate": 5.5130801774895624e-05,
      "loss": 1.4888,
      "step": 8497
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6668692827224731,
      "learning_rate": 5.5109362830786314e-05,
      "loss": 1.3243,
      "step": 8498
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2507561445236206,
      "learning_rate": 5.5087926470391395e-05,
      "loss": 0.9156,
      "step": 8499
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5152868032455444,
      "learning_rate": 5.50664926949447e-05,
      "loss": 1.6577,
      "step": 8500
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4847551584243774,
      "learning_rate": 5.5045061505679855e-05,
      "loss": 1.8128,
      "step": 8501
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2317785024642944,
      "learning_rate": 5.502363290383028e-05,
      "loss": 1.0423,
      "step": 8502
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7572108507156372,
      "learning_rate": 5.500220689062936e-05,
      "loss": 1.8721,
      "step": 8503
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5970736742019653,
      "learning_rate": 5.498078346731035e-05,
      "loss": 1.3976,
      "step": 8504
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3819754123687744,
      "learning_rate": 5.495936263510617e-05,
      "loss": 1.8652,
      "step": 8505
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9141349196434021,
      "learning_rate": 5.493794439524979e-05,
      "loss": 0.8809,
      "step": 8506
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2056360244750977,
      "learning_rate": 5.4916528748973974e-05,
      "loss": 1.2531,
      "step": 8507
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2086572647094727,
      "learning_rate": 5.4895115697511246e-05,
      "loss": 1.4533,
      "step": 8508
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0291227102279663,
      "learning_rate": 5.4873705242094074e-05,
      "loss": 1.4741,
      "step": 8509
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1184982061386108,
      "learning_rate": 5.485229738395481e-05,
      "loss": 1.6021,
      "step": 8510
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.230299949645996,
      "learning_rate": 5.483089212432555e-05,
      "loss": 1.4738,
      "step": 8511
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.03271484375,
      "learning_rate": 5.480948946443825e-05,
      "loss": 1.0512,
      "step": 8512
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4956718683242798,
      "learning_rate": 5.478808940552483e-05,
      "loss": 1.579,
      "step": 8513
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2169557809829712,
      "learning_rate": 5.4766691948816926e-05,
      "loss": 1.9127,
      "step": 8514
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0917404890060425,
      "learning_rate": 5.474529709554612e-05,
      "loss": 1.1021,
      "step": 8515
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3576639890670776,
      "learning_rate": 5.472390484694383e-05,
      "loss": 1.7828,
      "step": 8516
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.477758526802063,
      "learning_rate": 5.4702515204241234e-05,
      "loss": 0.8071,
      "step": 8517
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.484205484390259,
      "learning_rate": 5.4681128168669485e-05,
      "loss": 1.6954,
      "step": 8518
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6570450067520142,
      "learning_rate": 5.465974374145953e-05,
      "loss": 1.9328,
      "step": 8519
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4251078367233276,
      "learning_rate": 5.4638361923842175e-05,
      "loss": 1.6589,
      "step": 8520
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6501802206039429,
      "learning_rate": 5.4616982717047996e-05,
      "loss": 1.8611,
      "step": 8521
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.00386118888855,
      "learning_rate": 5.4595606122307566e-05,
      "loss": 1.5549,
      "step": 8522
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1300692558288574,
      "learning_rate": 5.4574232140851175e-05,
      "loss": 1.4317,
      "step": 8523
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2874631881713867,
      "learning_rate": 5.4552860773909035e-05,
      "loss": 1.4135,
      "step": 8524
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9744783043861389,
      "learning_rate": 5.453149202271124e-05,
      "loss": 1.2311,
      "step": 8525
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2576465606689453,
      "learning_rate": 5.451012588848761e-05,
      "loss": 0.9953,
      "step": 8526
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.690848469734192,
      "learning_rate": 5.4488762372467914e-05,
      "loss": 1.3685,
      "step": 8527
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.0628528594970703,
      "learning_rate": 5.446740147588181e-05,
      "loss": 1.2924,
      "step": 8528
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8715970516204834,
      "learning_rate": 5.444604319995867e-05,
      "loss": 1.2467,
      "step": 8529
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7554932832717896,
      "learning_rate": 5.4424687545927776e-05,
      "loss": 1.139,
      "step": 8530
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7276588678359985,
      "learning_rate": 5.4403334515018334e-05,
      "loss": 1.3787,
      "step": 8531
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4017361402511597,
      "learning_rate": 5.438198410845925e-05,
      "loss": 1.2815,
      "step": 8532
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3320993185043335,
      "learning_rate": 5.436063632747941e-05,
      "loss": 1.2604,
      "step": 8533
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2329479455947876,
      "learning_rate": 5.433929117330754e-05,
      "loss": 1.0281,
      "step": 8534
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6262924671173096,
      "learning_rate": 5.4317948647172076e-05,
      "loss": 1.3692,
      "step": 8535
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2404147386550903,
      "learning_rate": 5.429660875030148e-05,
      "loss": 1.4243,
      "step": 8536
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2965664863586426,
      "learning_rate": 5.4275271483923996e-05,
      "loss": 1.2557,
      "step": 8537
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8088724613189697,
      "learning_rate": 5.425393684926765e-05,
      "loss": 1.7522,
      "step": 8538
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.060921549797058,
      "learning_rate": 5.423260484756043e-05,
      "loss": 0.9454,
      "step": 8539
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1120671033859253,
      "learning_rate": 5.421127548003006e-05,
      "loss": 0.9068,
      "step": 8540
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.524959683418274,
      "learning_rate": 5.418994874790422e-05,
      "loss": 1.2349,
      "step": 8541
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.019923210144043,
      "learning_rate": 5.416862465241033e-05,
      "loss": 1.2472,
      "step": 8542
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2079402208328247,
      "learning_rate": 5.4147303194775745e-05,
      "loss": 0.9478,
      "step": 8543
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4460818767547607,
      "learning_rate": 5.412598437622767e-05,
      "loss": 1.6801,
      "step": 8544
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4625171422958374,
      "learning_rate": 5.410466819799306e-05,
      "loss": 1.3277,
      "step": 8545
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1583523750305176,
      "learning_rate": 5.4083354661298814e-05,
      "loss": 1.7734,
      "step": 8546
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8495047092437744,
      "learning_rate": 5.40620437673717e-05,
      "loss": 1.7546,
      "step": 8547
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2763289213180542,
      "learning_rate": 5.404073551743821e-05,
      "loss": 1.4334,
      "step": 8548
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6624820232391357,
      "learning_rate": 5.401942991272475e-05,
      "loss": 1.5157,
      "step": 8549
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3963401317596436,
      "learning_rate": 5.399812695445766e-05,
      "loss": 1.6045,
      "step": 8550
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.566410779953003,
      "learning_rate": 5.397682664386295e-05,
      "loss": 1.5246,
      "step": 8551
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4608731269836426,
      "learning_rate": 5.3955528982166624e-05,
      "loss": 2.0943,
      "step": 8552
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2643353939056396,
      "learning_rate": 5.3934233970594535e-05,
      "loss": 1.661,
      "step": 8553
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4602752923965454,
      "learning_rate": 5.3912941610372235e-05,
      "loss": 1.2521,
      "step": 8554
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.335328221321106,
      "learning_rate": 5.389165190272527e-05,
      "loss": 1.1944,
      "step": 8555
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2979611158370972,
      "learning_rate": 5.387036484887902e-05,
      "loss": 1.0472,
      "step": 8556
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3649334907531738,
      "learning_rate": 5.3849080450058655e-05,
      "loss": 1.272,
      "step": 8557
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9795647859573364,
      "learning_rate": 5.382779870748915e-05,
      "loss": 1.7082,
      "step": 8558
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.152193307876587,
      "learning_rate": 5.380651962239549e-05,
      "loss": 1.2118,
      "step": 8559
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7787055969238281,
      "learning_rate": 5.378524319600231e-05,
      "loss": 1.9598,
      "step": 8560
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0590720176696777,
      "learning_rate": 5.3763969429534254e-05,
      "loss": 1.3885,
      "step": 8561
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6047581434249878,
      "learning_rate": 5.374269832421579e-05,
      "loss": 0.6844,
      "step": 8562
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3229050636291504,
      "learning_rate": 5.372142988127108e-05,
      "loss": 1.4392,
      "step": 8563
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.084157109260559,
      "learning_rate": 5.370016410192431e-05,
      "loss": 1.4155,
      "step": 8564
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8102316856384277,
      "learning_rate": 5.367890098739949e-05,
      "loss": 1.7689,
      "step": 8565
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7441438436508179,
      "learning_rate": 5.36576405389204e-05,
      "loss": 1.3371,
      "step": 8566
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.46156644821167,
      "learning_rate": 5.363638275771064e-05,
      "loss": 1.7598,
      "step": 8567
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.084789991378784,
      "learning_rate": 5.361512764499379e-05,
      "loss": 1.2257,
      "step": 8568
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.344452142715454,
      "learning_rate": 5.3593875201993174e-05,
      "loss": 1.0718,
      "step": 8569
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0468720197677612,
      "learning_rate": 5.357262542993198e-05,
      "loss": 1.3558,
      "step": 8570
    },
    {
      "epoch": 0.65,
      "grad_norm": 6.858792781829834,
      "learning_rate": 5.355137833003332e-05,
      "loss": 1.5314,
      "step": 8571
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9546012878417969,
      "learning_rate": 5.353013390352e-05,
      "loss": 1.8709,
      "step": 8572
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5162408351898193,
      "learning_rate": 5.350889215161479e-05,
      "loss": 1.5454,
      "step": 8573
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.469722032546997,
      "learning_rate": 5.3487653075540334e-05,
      "loss": 1.5897,
      "step": 8574
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2200549840927124,
      "learning_rate": 5.346641667651897e-05,
      "loss": 1.8013,
      "step": 8575
    },
    {
      "epoch": 0.65,
      "grad_norm": 4.703245639801025,
      "learning_rate": 5.344518295577308e-05,
      "loss": 1.6648,
      "step": 8576
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5280202627182007,
      "learning_rate": 5.34239519145247e-05,
      "loss": 1.3942,
      "step": 8577
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.822288155555725,
      "learning_rate": 5.3402723553995806e-05,
      "loss": 1.1092,
      "step": 8578
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.9193724393844604,
      "learning_rate": 5.3381497875408214e-05,
      "loss": 1.0387,
      "step": 8579
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4655100107192993,
      "learning_rate": 5.3360274879983654e-05,
      "loss": 1.1134,
      "step": 8580
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1128791570663452,
      "learning_rate": 5.3339054568943526e-05,
      "loss": 0.8961,
      "step": 8581
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1707303524017334,
      "learning_rate": 5.3317836943509246e-05,
      "loss": 1.4889,
      "step": 8582
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0104730129241943,
      "learning_rate": 5.329662200490202e-05,
      "loss": 1.3665,
      "step": 8583
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.0617306232452393,
      "learning_rate": 5.327540975434284e-05,
      "loss": 1.3506,
      "step": 8584
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5229259729385376,
      "learning_rate": 5.3254200193052675e-05,
      "loss": 1.2704,
      "step": 8585
    },
    {
      "epoch": 0.66,
      "grad_norm": 4.480954170227051,
      "learning_rate": 5.323299332225218e-05,
      "loss": 1.0259,
      "step": 8586
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9457980394363403,
      "learning_rate": 5.321178914316192e-05,
      "loss": 1.1865,
      "step": 8587
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3757219314575195,
      "learning_rate": 5.3190587657002355e-05,
      "loss": 0.6897,
      "step": 8588
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4884467124938965,
      "learning_rate": 5.3169388864993786e-05,
      "loss": 1.2436,
      "step": 8589
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6029118299484253,
      "learning_rate": 5.314819276835625e-05,
      "loss": 1.2754,
      "step": 8590
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0771316289901733,
      "learning_rate": 5.312699936830976e-05,
      "loss": 1.3706,
      "step": 8591
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.6863880157470703,
      "learning_rate": 5.3105808666074134e-05,
      "loss": 1.9146,
      "step": 8592
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.845952033996582,
      "learning_rate": 5.3084620662868934e-05,
      "loss": 1.8581,
      "step": 8593
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.429208517074585,
      "learning_rate": 5.306343535991376e-05,
      "loss": 0.8069,
      "step": 8594
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4106954336166382,
      "learning_rate": 5.304225275842788e-05,
      "loss": 1.2621,
      "step": 8595
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6018279790878296,
      "learning_rate": 5.302107285963045e-05,
      "loss": 1.9007,
      "step": 8596
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.054560899734497,
      "learning_rate": 5.299989566474053e-05,
      "loss": 1.4589,
      "step": 8597
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9952759742736816,
      "learning_rate": 5.297872117497704e-05,
      "loss": 1.7549,
      "step": 8598
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.30438494682312,
      "learning_rate": 5.29575493915586e-05,
      "loss": 1.6259,
      "step": 8599
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.7184784412384033,
      "learning_rate": 5.293638031570382e-05,
      "loss": 1.6382,
      "step": 8600
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9037363529205322,
      "learning_rate": 5.291521394863113e-05,
      "loss": 0.9634,
      "step": 8601
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2696361541748047,
      "learning_rate": 5.28940502915587e-05,
      "loss": 1.6657,
      "step": 8602
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2149462699890137,
      "learning_rate": 5.287288934570469e-05,
      "loss": 1.4083,
      "step": 8603
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.687279462814331,
      "learning_rate": 5.285173111228698e-05,
      "loss": 1.1376,
      "step": 8604
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2366636991500854,
      "learning_rate": 5.283057559252341e-05,
      "loss": 2.0578,
      "step": 8605
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7759146690368652,
      "learning_rate": 5.2809422787631526e-05,
      "loss": 1.5009,
      "step": 8606
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.162660837173462,
      "learning_rate": 5.278827269882887e-05,
      "loss": 1.1278,
      "step": 8607
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3036555051803589,
      "learning_rate": 5.276712532733268e-05,
      "loss": 1.6368,
      "step": 8608
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2695472240447998,
      "learning_rate": 5.274598067436014e-05,
      "loss": 1.5275,
      "step": 8609
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4629244804382324,
      "learning_rate": 5.27248387411283e-05,
      "loss": 1.3092,
      "step": 8610
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4748691320419312,
      "learning_rate": 5.27036995288539e-05,
      "loss": 1.8371,
      "step": 8611
    },
    {
      "epoch": 0.66,
      "grad_norm": 4.629674911499023,
      "learning_rate": 5.2682563038753677e-05,
      "loss": 1.836,
      "step": 8612
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1812164783477783,
      "learning_rate": 5.2661429272044184e-05,
      "loss": 1.2286,
      "step": 8613
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3554646968841553,
      "learning_rate": 5.2640298229941763e-05,
      "loss": 1.3974,
      "step": 8614
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.592066764831543,
      "learning_rate": 5.261916991366258e-05,
      "loss": 1.2893,
      "step": 8615
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3302192687988281,
      "learning_rate": 5.259804432442279e-05,
      "loss": 1.2992,
      "step": 8616
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.951983094215393,
      "learning_rate": 5.257692146343819e-05,
      "loss": 1.648,
      "step": 8617
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.386336326599121,
      "learning_rate": 5.255580133192457e-05,
      "loss": 1.3877,
      "step": 8618
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3525445461273193,
      "learning_rate": 5.253468393109755e-05,
      "loss": 1.7686,
      "step": 8619
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.276740312576294,
      "learning_rate": 5.25135692621725e-05,
      "loss": 1.2437,
      "step": 8620
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.558230996131897,
      "learning_rate": 5.2492457326364697e-05,
      "loss": 1.4371,
      "step": 8621
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.506364345550537,
      "learning_rate": 5.247134812488932e-05,
      "loss": 1.7962,
      "step": 8622
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5706204175949097,
      "learning_rate": 5.245024165896126e-05,
      "loss": 2.0326,
      "step": 8623
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.989280343055725,
      "learning_rate": 5.2429137929795314e-05,
      "loss": 1.4523,
      "step": 8624
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0525319576263428,
      "learning_rate": 5.240803693860616e-05,
      "loss": 1.8173,
      "step": 8625
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.626422643661499,
      "learning_rate": 5.2386938686608236e-05,
      "loss": 1.3558,
      "step": 8626
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8545295596122742,
      "learning_rate": 5.2365843175015896e-05,
      "loss": 1.1452,
      "step": 8627
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.0306203365325928,
      "learning_rate": 5.234475040504333e-05,
      "loss": 1.8743,
      "step": 8628
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5512853860855103,
      "learning_rate": 5.2323660377904494e-05,
      "loss": 1.7096,
      "step": 8629
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2249579429626465,
      "learning_rate": 5.2302573094813266e-05,
      "loss": 1.2108,
      "step": 8630
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3573366403579712,
      "learning_rate": 5.2281488556983385e-05,
      "loss": 1.2364,
      "step": 8631
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1003988981246948,
      "learning_rate": 5.226040676562835e-05,
      "loss": 1.1427,
      "step": 8632
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6654446125030518,
      "learning_rate": 5.223932772196149e-05,
      "loss": 1.5287,
      "step": 8633
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2919540405273438,
      "learning_rate": 5.221825142719612e-05,
      "loss": 1.4842,
      "step": 8634
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3977289199829102,
      "learning_rate": 5.219717788254521e-05,
      "loss": 1.2999,
      "step": 8635
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2271515130996704,
      "learning_rate": 5.217610708922172e-05,
      "loss": 1.4255,
      "step": 8636
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5504399538040161,
      "learning_rate": 5.215503904843842e-05,
      "loss": 1.6103,
      "step": 8637
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.22818922996521,
      "learning_rate": 5.213397376140781e-05,
      "loss": 1.1285,
      "step": 8638
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1326247453689575,
      "learning_rate": 5.211291122934237e-05,
      "loss": 1.6743,
      "step": 8639
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3391844034194946,
      "learning_rate": 5.209185145345439e-05,
      "loss": 1.6833,
      "step": 8640
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4413400888442993,
      "learning_rate": 5.207079443495595e-05,
      "loss": 1.2072,
      "step": 8641
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2761889696121216,
      "learning_rate": 5.204974017505904e-05,
      "loss": 1.3035,
      "step": 8642
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.154171109199524,
      "learning_rate": 5.2028688674975415e-05,
      "loss": 1.1033,
      "step": 8643
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.487654447555542,
      "learning_rate": 5.200763993591669e-05,
      "loss": 1.9485,
      "step": 8644
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4495360851287842,
      "learning_rate": 5.198659395909438e-05,
      "loss": 1.3993,
      "step": 8645
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9527171850204468,
      "learning_rate": 5.1965550745719824e-05,
      "loss": 1.3378,
      "step": 8646
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0611716508865356,
      "learning_rate": 5.194451029700411e-05,
      "loss": 1.3516,
      "step": 8647
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0674078464508057,
      "learning_rate": 5.1923472614158276e-05,
      "loss": 0.8111,
      "step": 8648
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.419411301612854,
      "learning_rate": 5.19024376983932e-05,
      "loss": 1.4764,
      "step": 8649
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.7541950941085815,
      "learning_rate": 5.1881405550919493e-05,
      "loss": 0.7889,
      "step": 8650
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.399109959602356,
      "learning_rate": 5.1860376172947746e-05,
      "loss": 1.1424,
      "step": 8651
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.770755410194397,
      "learning_rate": 5.1839349565688275e-05,
      "loss": 1.4593,
      "step": 8652
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9619584083557129,
      "learning_rate": 5.181832573035126e-05,
      "loss": 1.4069,
      "step": 8653
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0120856761932373,
      "learning_rate": 5.179730466814676e-05,
      "loss": 1.4231,
      "step": 8654
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7290962934494019,
      "learning_rate": 5.177628638028472e-05,
      "loss": 1.1035,
      "step": 8655
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8078352212905884,
      "learning_rate": 5.175527086797478e-05,
      "loss": 1.1367,
      "step": 8656
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.9841585159301758,
      "learning_rate": 5.1734258132426536e-05,
      "loss": 1.5128,
      "step": 8657
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.26837956905365,
      "learning_rate": 5.171324817484943e-05,
      "loss": 1.5185,
      "step": 8658
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2455430030822754,
      "learning_rate": 5.1692240996452624e-05,
      "loss": 0.9707,
      "step": 8659
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2118377685546875,
      "learning_rate": 5.16712365984453e-05,
      "loss": 1.5161,
      "step": 8660
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.373857021331787,
      "learning_rate": 5.165023498203631e-05,
      "loss": 0.9142,
      "step": 8661
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.072542428970337,
      "learning_rate": 5.162923614843442e-05,
      "loss": 1.4475,
      "step": 8662
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5253099203109741,
      "learning_rate": 5.160824009884825e-05,
      "loss": 1.4448,
      "step": 8663
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.0126357078552246,
      "learning_rate": 5.158724683448628e-05,
      "loss": 1.9127,
      "step": 8664
    },
    {
      "epoch": 0.66,
      "grad_norm": 4.411964416503906,
      "learning_rate": 5.156625635655672e-05,
      "loss": 1.2569,
      "step": 8665
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6560019254684448,
      "learning_rate": 5.154526866626772e-05,
      "loss": 1.17,
      "step": 8666
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.749440312385559,
      "learning_rate": 5.15242837648273e-05,
      "loss": 1.6503,
      "step": 8667
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2483059167861938,
      "learning_rate": 5.150330165344317e-05,
      "loss": 1.4495,
      "step": 8668
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1714750528335571,
      "learning_rate": 5.148232233332303e-05,
      "loss": 1.2584,
      "step": 8669
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9691150188446045,
      "learning_rate": 5.146134580567438e-05,
      "loss": 1.6425,
      "step": 8670
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7127339839935303,
      "learning_rate": 5.14403720717045e-05,
      "loss": 2.005,
      "step": 8671
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.179521918296814,
      "learning_rate": 5.141940113262052e-05,
      "loss": 1.0852,
      "step": 8672
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1986078023910522,
      "learning_rate": 5.1398432989629473e-05,
      "loss": 1.6561,
      "step": 8673
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.020575761795044,
      "learning_rate": 5.137746764393825e-05,
      "loss": 1.1283,
      "step": 8674
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2502293586730957,
      "learning_rate": 5.135650509675342e-05,
      "loss": 1.6578,
      "step": 8675
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.8869434595108032,
      "learning_rate": 5.133554534928156e-05,
      "loss": 1.0758,
      "step": 8676
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5809658765792847,
      "learning_rate": 5.1314588402729044e-05,
      "loss": 1.6831,
      "step": 8677
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.408761739730835,
      "learning_rate": 5.1293634258302005e-05,
      "loss": 1.1291,
      "step": 8678
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5764559507369995,
      "learning_rate": 5.127268291720655e-05,
      "loss": 1.1883,
      "step": 8679
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.7006478309631348,
      "learning_rate": 5.12517343806485e-05,
      "loss": 0.9334,
      "step": 8680
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6438181400299072,
      "learning_rate": 5.123078864983353e-05,
      "loss": 1.5358,
      "step": 8681
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4801839590072632,
      "learning_rate": 5.1209845725967235e-05,
      "loss": 1.3509,
      "step": 8682
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6585156917572021,
      "learning_rate": 5.1188905610255025e-05,
      "loss": 1.0642,
      "step": 8683
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.091517210006714,
      "learning_rate": 5.1167968303902046e-05,
      "loss": 1.8469,
      "step": 8684
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4097213745117188,
      "learning_rate": 5.1147033808113424e-05,
      "loss": 1.9876,
      "step": 8685
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.677225112915039,
      "learning_rate": 5.1126102124094064e-05,
      "loss": 0.9375,
      "step": 8686
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.304816484451294,
      "learning_rate": 5.110517325304865e-05,
      "loss": 1.5819,
      "step": 8687
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6118601560592651,
      "learning_rate": 5.108424719618183e-05,
      "loss": 2.2577,
      "step": 8688
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6882410049438477,
      "learning_rate": 5.1063323954697996e-05,
      "loss": 1.2326,
      "step": 8689
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.490090250968933,
      "learning_rate": 5.104240352980134e-05,
      "loss": 1.3272,
      "step": 8690
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8406685590744019,
      "learning_rate": 5.1021485922695986e-05,
      "loss": 1.1715,
      "step": 8691
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.9780728816986084,
      "learning_rate": 5.100057113458592e-05,
      "loss": 2.4373,
      "step": 8692
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2644850015640259,
      "learning_rate": 5.097965916667483e-05,
      "loss": 0.8157,
      "step": 8693
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1840169429779053,
      "learning_rate": 5.0958750020166344e-05,
      "loss": 1.0785,
      "step": 8694
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.956409454345703,
      "learning_rate": 5.0937843696263966e-05,
      "loss": 1.6115,
      "step": 8695
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.345454692840576,
      "learning_rate": 5.091694019617088e-05,
      "loss": 1.7866,
      "step": 8696
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.563120722770691,
      "learning_rate": 5.089603952109028e-05,
      "loss": 1.334,
      "step": 8697
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7473214864730835,
      "learning_rate": 5.087514167222505e-05,
      "loss": 2.0749,
      "step": 8698
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3538703918457031,
      "learning_rate": 5.085424665077805e-05,
      "loss": 1.2669,
      "step": 8699
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5731323957443237,
      "learning_rate": 5.0833354457951833e-05,
      "loss": 1.3041,
      "step": 8700
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0302605628967285,
      "learning_rate": 5.0812465094948944e-05,
      "loss": 1.1412,
      "step": 8701
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.980391025543213,
      "learning_rate": 5.079157856297162e-05,
      "loss": 1.6516,
      "step": 8702
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2491651773452759,
      "learning_rate": 5.077069486322203e-05,
      "loss": 1.5313,
      "step": 8703
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8097323179244995,
      "learning_rate": 5.074981399690218e-05,
      "loss": 1.2497,
      "step": 8704
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5183050632476807,
      "learning_rate": 5.0728935965213834e-05,
      "loss": 0.9064,
      "step": 8705
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5475646257400513,
      "learning_rate": 5.070806076935866e-05,
      "loss": 1.3135,
      "step": 8706
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.330970287322998,
      "learning_rate": 5.0687188410538166e-05,
      "loss": 1.2378,
      "step": 8707
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6834968328475952,
      "learning_rate": 5.066631888995368e-05,
      "loss": 2.0226,
      "step": 8708
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.374068260192871,
      "learning_rate": 5.064545220880631e-05,
      "loss": 1.7797,
      "step": 8709
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.23457670211792,
      "learning_rate": 5.062458836829711e-05,
      "loss": 2.9755,
      "step": 8710
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.2660927772521973,
      "learning_rate": 5.060372736962686e-05,
      "loss": 1.6549,
      "step": 8711
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.1849472522735596,
      "learning_rate": 5.058286921399626e-05,
      "loss": 0.8917,
      "step": 8712
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.0619019269943237,
      "learning_rate": 5.056201390260587e-05,
      "loss": 0.9816,
      "step": 8713
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.331616759300232,
      "learning_rate": 5.054116143665592e-05,
      "loss": 1.6379,
      "step": 8714
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3527976274490356,
      "learning_rate": 5.052031181734667e-05,
      "loss": 1.0727,
      "step": 8715
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3162205219268799,
      "learning_rate": 5.0499465045878134e-05,
      "loss": 1.254,
      "step": 8716
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.239630937576294,
      "learning_rate": 5.047862112345015e-05,
      "loss": 0.9491,
      "step": 8717
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.1496334075927734,
      "learning_rate": 5.045778005126238e-05,
      "loss": 0.9968,
      "step": 8718
    },
    {
      "epoch": 0.67,
      "grad_norm": 5.301722049713135,
      "learning_rate": 5.043694183051439e-05,
      "loss": 1.3753,
      "step": 8719
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.227211594581604,
      "learning_rate": 5.041610646240549e-05,
      "loss": 1.0274,
      "step": 8720
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2891297340393066,
      "learning_rate": 5.03952739481349e-05,
      "loss": 1.3488,
      "step": 8721
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1666444540023804,
      "learning_rate": 5.037444428890169e-05,
      "loss": 1.5419,
      "step": 8722
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9741489887237549,
      "learning_rate": 5.0353617485904657e-05,
      "loss": 0.9787,
      "step": 8723
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2322735786437988,
      "learning_rate": 5.0332793540342525e-05,
      "loss": 1.8328,
      "step": 8724
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.081039309501648,
      "learning_rate": 5.03119724534139e-05,
      "loss": 1.0355,
      "step": 8725
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0914404392242432,
      "learning_rate": 5.029115422631709e-05,
      "loss": 1.1283,
      "step": 8726
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6102471351623535,
      "learning_rate": 5.027033886025029e-05,
      "loss": 1.8992,
      "step": 8727
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.534037470817566,
      "learning_rate": 5.0249526356411595e-05,
      "loss": 0.9073,
      "step": 8728
    },
    {
      "epoch": 0.67,
      "grad_norm": 6.820169448852539,
      "learning_rate": 5.022871671599883e-05,
      "loss": 2.5953,
      "step": 8729
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5760337114334106,
      "learning_rate": 5.020790994020972e-05,
      "loss": 1.8727,
      "step": 8730
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8586870431900024,
      "learning_rate": 5.018710603024187e-05,
      "loss": 1.54,
      "step": 8731
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1521193981170654,
      "learning_rate": 5.01663049872926e-05,
      "loss": 1.98,
      "step": 8732
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.593691349029541,
      "learning_rate": 5.014550681255914e-05,
      "loss": 0.7114,
      "step": 8733
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.111969232559204,
      "learning_rate": 5.012471150723861e-05,
      "loss": 1.6285,
      "step": 8734
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.091114044189453,
      "learning_rate": 5.0103919072527804e-05,
      "loss": 1.0042,
      "step": 8735
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.94064462184906,
      "learning_rate": 5.008312950962354e-05,
      "loss": 1.1489,
      "step": 8736
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2875518798828125,
      "learning_rate": 5.0062342819722326e-05,
      "loss": 1.5185,
      "step": 8737
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.207876205444336,
      "learning_rate": 5.004155900402052e-05,
      "loss": 1.449,
      "step": 8738
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4380964040756226,
      "learning_rate": 5.002077806371438e-05,
      "loss": 1.8386,
      "step": 8739
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0811965465545654,
      "learning_rate": 5.000000000000002e-05,
      "loss": 1.298,
      "step": 8740
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.416649580001831,
      "learning_rate": 4.997922481407326e-05,
      "loss": 1.5035,
      "step": 8741
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1399338245391846,
      "learning_rate": 4.995845250712986e-05,
      "loss": 1.7211,
      "step": 8742
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3901127576828003,
      "learning_rate": 4.993768308036544e-05,
      "loss": 1.4208,
      "step": 8743
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1572060585021973,
      "learning_rate": 4.99169165349753e-05,
      "loss": 1.8338,
      "step": 8744
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.214261293411255,
      "learning_rate": 4.989615287215478e-05,
      "loss": 0.806,
      "step": 8745
    },
    {
      "epoch": 0.67,
      "grad_norm": 5.214333534240723,
      "learning_rate": 4.987539209309888e-05,
      "loss": 1.8659,
      "step": 8746
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5518856048583984,
      "learning_rate": 4.985463419900248e-05,
      "loss": 1.1931,
      "step": 8747
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3952990770339966,
      "learning_rate": 4.983387919106035e-05,
      "loss": 0.6937,
      "step": 8748
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8467646837234497,
      "learning_rate": 4.981312707046712e-05,
      "loss": 1.9058,
      "step": 8749
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3518561124801636,
      "learning_rate": 4.979237783841707e-05,
      "loss": 1.4108,
      "step": 8750
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1106692552566528,
      "learning_rate": 4.977163149610452e-05,
      "loss": 1.6791,
      "step": 8751
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2847309112548828,
      "learning_rate": 4.975088804472356e-05,
      "loss": 1.0691,
      "step": 8752
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.598008632659912,
      "learning_rate": 4.9730147485468025e-05,
      "loss": 1.811,
      "step": 8753
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2061855792999268,
      "learning_rate": 4.9709409819531726e-05,
      "loss": 1.7493,
      "step": 8754
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0683913230895996,
      "learning_rate": 4.96886750481082e-05,
      "loss": 2.6053,
      "step": 8755
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3602855205535889,
      "learning_rate": 4.9667943172390805e-05,
      "loss": 1.0869,
      "step": 8756
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.358149528503418,
      "learning_rate": 4.964721419357283e-05,
      "loss": 1.9199,
      "step": 8757
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.657830834388733,
      "learning_rate": 4.962648811284738e-05,
      "loss": 1.7487,
      "step": 8758
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.022294998168945,
      "learning_rate": 4.960576493140728e-05,
      "loss": 1.7181,
      "step": 8759
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.50216805934906,
      "learning_rate": 4.958504465044532e-05,
      "loss": 1.4347,
      "step": 8760
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.32310950756073,
      "learning_rate": 4.95643272711541e-05,
      "loss": 0.9064,
      "step": 8761
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5289794206619263,
      "learning_rate": 4.9543612794725936e-05,
      "loss": 1.7231,
      "step": 8762
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.261253833770752,
      "learning_rate": 4.952290122235316e-05,
      "loss": 1.3387,
      "step": 8763
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0431015491485596,
      "learning_rate": 4.950219255522775e-05,
      "loss": 1.1442,
      "step": 8764
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.271406888961792,
      "learning_rate": 4.948148679454171e-05,
      "loss": 1.3185,
      "step": 8765
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4380308389663696,
      "learning_rate": 4.9460783941486675e-05,
      "loss": 1.101,
      "step": 8766
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.756717562675476,
      "learning_rate": 4.94400839972543e-05,
      "loss": 1.8034,
      "step": 8767
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.6334240436553955,
      "learning_rate": 4.941938696303592e-05,
      "loss": 1.6625,
      "step": 8768
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1350237131118774,
      "learning_rate": 4.939869284002278e-05,
      "loss": 1.1515,
      "step": 8769
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8214353322982788,
      "learning_rate": 4.9378001629406e-05,
      "loss": 1.8328,
      "step": 8770
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1902488470077515,
      "learning_rate": 4.935731333237641e-05,
      "loss": 1.2967,
      "step": 8771
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1344345808029175,
      "learning_rate": 4.9336627950124756e-05,
      "loss": 1.1942,
      "step": 8772
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8750731945037842,
      "learning_rate": 4.931594548384166e-05,
      "loss": 1.5085,
      "step": 8773
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3815648555755615,
      "learning_rate": 4.929526593471747e-05,
      "loss": 0.9238,
      "step": 8774
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6489382982254028,
      "learning_rate": 4.927458930394238e-05,
      "loss": 1.5358,
      "step": 8775
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6742603778839111,
      "learning_rate": 4.9253915592706515e-05,
      "loss": 1.5019,
      "step": 8776
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2755837440490723,
      "learning_rate": 4.92332448021997e-05,
      "loss": 1.092,
      "step": 8777
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1677523851394653,
      "learning_rate": 4.921257693361167e-05,
      "loss": 0.8206,
      "step": 8778
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.704352378845215,
      "learning_rate": 4.9191911988132064e-05,
      "loss": 1.0166,
      "step": 8779
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.21833336353302,
      "learning_rate": 4.9171249966950175e-05,
      "loss": 0.8156,
      "step": 8780
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.039673089981079,
      "learning_rate": 4.915059087125523e-05,
      "loss": 1.1981,
      "step": 8781
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4623422622680664,
      "learning_rate": 4.912993470223635e-05,
      "loss": 1.5318,
      "step": 8782
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.232453465461731,
      "learning_rate": 4.910928146108237e-05,
      "loss": 1.2289,
      "step": 8783
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4662209749221802,
      "learning_rate": 4.9088631148981956e-05,
      "loss": 1.2654,
      "step": 8784
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6169155836105347,
      "learning_rate": 4.9067983767123736e-05,
      "loss": 1.8617,
      "step": 8785
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3940279483795166,
      "learning_rate": 4.9047339316696004e-05,
      "loss": 1.9381,
      "step": 8786
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5849281549453735,
      "learning_rate": 4.902669779888702e-05,
      "loss": 1.6217,
      "step": 8787
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1170156002044678,
      "learning_rate": 4.900605921488484e-05,
      "loss": 0.8301,
      "step": 8788
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.015224814414978,
      "learning_rate": 4.898542356587728e-05,
      "loss": 1.1935,
      "step": 8789
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4717270135879517,
      "learning_rate": 4.8964790853052047e-05,
      "loss": 1.5966,
      "step": 8790
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3991167545318604,
      "learning_rate": 4.894416107759673e-05,
      "loss": 1.5319,
      "step": 8791
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.089601993560791,
      "learning_rate": 4.8923534240698655e-05,
      "loss": 0.984,
      "step": 8792
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4161667823791504,
      "learning_rate": 4.8902910343544986e-05,
      "loss": 1.3276,
      "step": 8793
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5120160579681396,
      "learning_rate": 4.888228938732279e-05,
      "loss": 1.3964,
      "step": 8794
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7873681783676147,
      "learning_rate": 4.886167137321888e-05,
      "loss": 1.2569,
      "step": 8795
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.8706159591674805,
      "learning_rate": 4.884105630241995e-05,
      "loss": 1.4412,
      "step": 8796
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4660956859588623,
      "learning_rate": 4.882044417611258e-05,
      "loss": 0.9871,
      "step": 8797
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3036150932312012,
      "learning_rate": 4.8799834995483026e-05,
      "loss": 0.993,
      "step": 8798
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.055727005004883,
      "learning_rate": 4.877922876171751e-05,
      "loss": 1.6158,
      "step": 8799
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7239164113998413,
      "learning_rate": 4.875862547600207e-05,
      "loss": 1.9568,
      "step": 8800
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4122815132141113,
      "learning_rate": 4.873802513952248e-05,
      "loss": 1.2212,
      "step": 8801
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2293589115142822,
      "learning_rate": 4.871742775346447e-05,
      "loss": 1.3313,
      "step": 8802
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.719970703125,
      "learning_rate": 4.86968333190135e-05,
      "loss": 2.0185,
      "step": 8803
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7809181213378906,
      "learning_rate": 4.8676241837354876e-05,
      "loss": 1.0358,
      "step": 8804
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.8149635791778564,
      "learning_rate": 4.8655653309673776e-05,
      "loss": 1.1783,
      "step": 8805
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0363743305206299,
      "learning_rate": 4.863506773715524e-05,
      "loss": 1.425,
      "step": 8806
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1756540536880493,
      "learning_rate": 4.8614485120984e-05,
      "loss": 0.8638,
      "step": 8807
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1982344388961792,
      "learning_rate": 4.8593905462344745e-05,
      "loss": 1.4263,
      "step": 8808
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4069076776504517,
      "learning_rate": 4.857332876242195e-05,
      "loss": 1.0307,
      "step": 8809
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0048487186431885,
      "learning_rate": 4.855275502239997e-05,
      "loss": 0.7541,
      "step": 8810
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.7007575035095215,
      "learning_rate": 4.8532184243462896e-05,
      "loss": 1.8175,
      "step": 8811
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.022151231765747,
      "learning_rate": 4.851161642679466e-05,
      "loss": 1.3089,
      "step": 8812
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2529950141906738,
      "learning_rate": 4.849105157357914e-05,
      "loss": 1.2517,
      "step": 8813
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1790452003479004,
      "learning_rate": 4.847048968499987e-05,
      "loss": 1.2826,
      "step": 8814
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2396910190582275,
      "learning_rate": 4.8449930762240355e-05,
      "loss": 1.7001,
      "step": 8815
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9864329099655151,
      "learning_rate": 4.8429374806483904e-05,
      "loss": 1.4454,
      "step": 8816
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.9130187034606934,
      "learning_rate": 4.840882181891359e-05,
      "loss": 1.2965,
      "step": 8817
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5754040479660034,
      "learning_rate": 4.838827180071234e-05,
      "loss": 1.7227,
      "step": 8818
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4235626459121704,
      "learning_rate": 4.8367724753063004e-05,
      "loss": 1.5246,
      "step": 8819
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.8938860893249512,
      "learning_rate": 4.834718067714813e-05,
      "loss": 0.9819,
      "step": 8820
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3377914428710938,
      "learning_rate": 4.832663957415012e-05,
      "loss": 0.8163,
      "step": 8821
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3217484951019287,
      "learning_rate": 4.8306101445251296e-05,
      "loss": 1.139,
      "step": 8822
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7859008312225342,
      "learning_rate": 4.828556629163368e-05,
      "loss": 0.8892,
      "step": 8823
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3610787391662598,
      "learning_rate": 4.826503411447921e-05,
      "loss": 1.6365,
      "step": 8824
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.344353437423706,
      "learning_rate": 4.824450491496968e-05,
      "loss": 1.0649,
      "step": 8825
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.477405071258545,
      "learning_rate": 4.82239786942866e-05,
      "loss": 1.6966,
      "step": 8826
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1434853076934814,
      "learning_rate": 4.820345545361139e-05,
      "loss": 0.9497,
      "step": 8827
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.1081750392913818,
      "learning_rate": 4.818293519412532e-05,
      "loss": 0.8559,
      "step": 8828
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.7658026218414307,
      "learning_rate": 4.8162417917009386e-05,
      "loss": 1.4221,
      "step": 8829
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.406290888786316,
      "learning_rate": 4.814190362344454e-05,
      "loss": 1.3847,
      "step": 8830
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.175563097000122,
      "learning_rate": 4.812139231461147e-05,
      "loss": 0.8824,
      "step": 8831
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.014983892440796,
      "learning_rate": 4.810088399169067e-05,
      "loss": 1.3478,
      "step": 8832
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3310904502868652,
      "learning_rate": 4.808037865586256e-05,
      "loss": 0.7739,
      "step": 8833
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3925387859344482,
      "learning_rate": 4.8059876308307373e-05,
      "loss": 1.4059,
      "step": 8834
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.421492338180542,
      "learning_rate": 4.8039376950205064e-05,
      "loss": 1.4042,
      "step": 8835
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4566997289657593,
      "learning_rate": 4.8018880582735514e-05,
      "loss": 1.5063,
      "step": 8836
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3506340980529785,
      "learning_rate": 4.799838720707846e-05,
      "loss": 1.3717,
      "step": 8837
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.234452724456787,
      "learning_rate": 4.7977896824413335e-05,
      "loss": 1.4196,
      "step": 8838
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7071800231933594,
      "learning_rate": 4.795740943591955e-05,
      "loss": 1.6444,
      "step": 8839
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.9445214867591858,
      "learning_rate": 4.793692504277625e-05,
      "loss": 1.5606,
      "step": 8840
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4826438426971436,
      "learning_rate": 4.7916443646162355e-05,
      "loss": 1.3724,
      "step": 8841
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0251445770263672,
      "learning_rate": 4.789596524725677e-05,
      "loss": 1.2189,
      "step": 8842
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0091607570648193,
      "learning_rate": 4.7875489847238155e-05,
      "loss": 1.6198,
      "step": 8843
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.648420572280884,
      "learning_rate": 4.785501744728491e-05,
      "loss": 2.2297,
      "step": 8844
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7450718879699707,
      "learning_rate": 4.783454804857539e-05,
      "loss": 1.2697,
      "step": 8845
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.0566582679748535,
      "learning_rate": 4.7814081652287755e-05,
      "loss": 1.1692,
      "step": 8846
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5196152925491333,
      "learning_rate": 4.77936182595999e-05,
      "loss": 1.413,
      "step": 8847
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2687864303588867,
      "learning_rate": 4.777315787168968e-05,
      "loss": 1.717,
      "step": 8848
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2952854633331299,
      "learning_rate": 4.775270048973466e-05,
      "loss": 1.5838,
      "step": 8849
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.281908392906189,
      "learning_rate": 4.7732246114912246e-05,
      "loss": 1.3162,
      "step": 8850
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.314058542251587,
      "learning_rate": 4.7711794748399765e-05,
      "loss": 1.367,
      "step": 8851
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.616553783416748,
      "learning_rate": 4.7691346391374326e-05,
      "loss": 1.4968,
      "step": 8852
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5857748985290527,
      "learning_rate": 4.7670901045012775e-05,
      "loss": 0.9777,
      "step": 8853
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.049737811088562,
      "learning_rate": 4.7650458710491906e-05,
      "loss": 0.8991,
      "step": 8854
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.935415744781494,
      "learning_rate": 4.763001938898832e-05,
      "loss": 2.2314,
      "step": 8855
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.615770697593689,
      "learning_rate": 4.7609583081678356e-05,
      "loss": 1.4111,
      "step": 8856
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2940270900726318,
      "learning_rate": 4.7589149789738306e-05,
      "loss": 1.3929,
      "step": 8857
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5311037302017212,
      "learning_rate": 4.756871951434415e-05,
      "loss": 0.6428,
      "step": 8858
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.009124279022217,
      "learning_rate": 4.7548292256671845e-05,
      "loss": 1.4896,
      "step": 8859
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9550358653068542,
      "learning_rate": 4.752786801789703e-05,
      "loss": 1.0734,
      "step": 8860
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5138214826583862,
      "learning_rate": 4.750744679919529e-05,
      "loss": 1.1667,
      "step": 8861
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.312833309173584,
      "learning_rate": 4.7487028601741934e-05,
      "loss": 1.5323,
      "step": 8862
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0136771202087402,
      "learning_rate": 4.746661342671216e-05,
      "loss": 1.3164,
      "step": 8863
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5061261653900146,
      "learning_rate": 4.7446201275281035e-05,
      "loss": 1.0612,
      "step": 8864
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1812299489974976,
      "learning_rate": 4.7425792148623315e-05,
      "loss": 1.5968,
      "step": 8865
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6123063564300537,
      "learning_rate": 4.74053860479137e-05,
      "loss": 1.8437,
      "step": 8866
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.134412169456482,
      "learning_rate": 4.738498297432672e-05,
      "loss": 1.4765,
      "step": 8867
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2667181491851807,
      "learning_rate": 4.736458292903664e-05,
      "loss": 2.0455,
      "step": 8868
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6059532165527344,
      "learning_rate": 4.734418591321757e-05,
      "loss": 1.177,
      "step": 8869
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3753979206085205,
      "learning_rate": 4.732379192804356e-05,
      "loss": 1.6101,
      "step": 8870
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4024384021759033,
      "learning_rate": 4.7303400974688305e-05,
      "loss": 1.0862,
      "step": 8871
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.386650800704956,
      "learning_rate": 4.728301305432548e-05,
      "loss": 1.4853,
      "step": 8872
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.5352206230163574,
      "learning_rate": 4.726262816812855e-05,
      "loss": 1.6067,
      "step": 8873
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.814233422279358,
      "learning_rate": 4.7242246317270724e-05,
      "loss": 1.5216,
      "step": 8874
    },
    {
      "epoch": 0.68,
      "grad_norm": 4.1383748054504395,
      "learning_rate": 4.722186750292511e-05,
      "loss": 1.7859,
      "step": 8875
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.8501088619232178,
      "learning_rate": 4.720149172626467e-05,
      "loss": 2.4535,
      "step": 8876
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8053597211837769,
      "learning_rate": 4.7181118988462124e-05,
      "loss": 1.6514,
      "step": 8877
    },
    {
      "epoch": 0.68,
      "grad_norm": 4.48660135269165,
      "learning_rate": 4.716074929068999e-05,
      "loss": 1.9801,
      "step": 8878
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6791369915008545,
      "learning_rate": 4.714038263412074e-05,
      "loss": 1.2513,
      "step": 8879
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4394117593765259,
      "learning_rate": 4.712001901992652e-05,
      "loss": 0.8759,
      "step": 8880
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3596163988113403,
      "learning_rate": 4.7099658449279405e-05,
      "loss": 1.4879,
      "step": 8881
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8321216106414795,
      "learning_rate": 4.70793009233513e-05,
      "loss": 1.7985,
      "step": 8882
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.834944486618042,
      "learning_rate": 4.7058946443313826e-05,
      "loss": 1.6503,
      "step": 8883
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.583158254623413,
      "learning_rate": 4.703859501033854e-05,
      "loss": 1.649,
      "step": 8884
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3332232236862183,
      "learning_rate": 4.701824662559682e-05,
      "loss": 1.2983,
      "step": 8885
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1351954936981201,
      "learning_rate": 4.699790129025978e-05,
      "loss": 1.478,
      "step": 8886
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3142422437667847,
      "learning_rate": 4.6977559005498406e-05,
      "loss": 1.093,
      "step": 8887
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.8545939922332764,
      "learning_rate": 4.695721977248356e-05,
      "loss": 1.5257,
      "step": 8888
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5328630208969116,
      "learning_rate": 4.693688359238583e-05,
      "loss": 1.3504,
      "step": 8889
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4446029663085938,
      "learning_rate": 4.6916550466375684e-05,
      "loss": 0.6978,
      "step": 8890
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2544883489608765,
      "learning_rate": 4.6896220395623484e-05,
      "loss": 2.187,
      "step": 8891
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0943440198898315,
      "learning_rate": 4.687589338129925e-05,
      "loss": 0.8605,
      "step": 8892
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4306838512420654,
      "learning_rate": 4.6855569424572955e-05,
      "loss": 2.3711,
      "step": 8893
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3614188432693481,
      "learning_rate": 4.68352485266144e-05,
      "loss": 1.6675,
      "step": 8894
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.813971519470215,
      "learning_rate": 4.68149306885931e-05,
      "loss": 1.5289,
      "step": 8895
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3012717962265015,
      "learning_rate": 4.679461591167853e-05,
      "loss": 1.4277,
      "step": 8896
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1902379989624023,
      "learning_rate": 4.677430419703989e-05,
      "loss": 1.6343,
      "step": 8897
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8683902025222778,
      "learning_rate": 4.6753995545846184e-05,
      "loss": 1.8745,
      "step": 8898
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.085082769393921,
      "learning_rate": 4.673368995926636e-05,
      "loss": 1.3524,
      "step": 8899
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0966976881027222,
      "learning_rate": 4.6713387438469136e-05,
      "loss": 0.9719,
      "step": 8900
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.0243085622787476,
      "learning_rate": 4.6693087984622966e-05,
      "loss": 1.5147,
      "step": 8901
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9934937953948975,
      "learning_rate": 4.667279159889624e-05,
      "loss": 1.909,
      "step": 8902
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.5524322986602783,
      "learning_rate": 4.665249828245717e-05,
      "loss": 2.156,
      "step": 8903
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3680812120437622,
      "learning_rate": 4.663220803647368e-05,
      "loss": 1.9372,
      "step": 8904
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2073373794555664,
      "learning_rate": 4.661192086211366e-05,
      "loss": 1.2081,
      "step": 8905
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.5457048416137695,
      "learning_rate": 4.659163676054472e-05,
      "loss": 1.3582,
      "step": 8906
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.284346580505371,
      "learning_rate": 4.6571355732934285e-05,
      "loss": 0.9249,
      "step": 8907
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2064660787582397,
      "learning_rate": 4.655107778044969e-05,
      "loss": 1.2735,
      "step": 8908
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2453171014785767,
      "learning_rate": 4.653080290425808e-05,
      "loss": 1.5738,
      "step": 8909
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.260828971862793,
      "learning_rate": 4.651053110552631e-05,
      "loss": 0.8844,
      "step": 8910
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2944726943969727,
      "learning_rate": 4.64902623854212e-05,
      "loss": 1.5388,
      "step": 8911
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2718263864517212,
      "learning_rate": 4.6469996745109333e-05,
      "loss": 1.0048,
      "step": 8912
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.037524700164795,
      "learning_rate": 4.644973418575708e-05,
      "loss": 1.3633,
      "step": 8913
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6440649032592773,
      "learning_rate": 4.6429474708530696e-05,
      "loss": 1.5252,
      "step": 8914
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2815138101577759,
      "learning_rate": 4.640921831459623e-05,
      "loss": 1.2425,
      "step": 8915
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9286829233169556,
      "learning_rate": 4.63889650051195e-05,
      "loss": 1.8315,
      "step": 8916
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.296246290206909,
      "learning_rate": 4.636871478126624e-05,
      "loss": 2.0568,
      "step": 8917
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.548468828201294,
      "learning_rate": 4.634846764420201e-05,
      "loss": 1.1623,
      "step": 8918
    },
    {
      "epoch": 0.68,
      "grad_norm": 11.568144798278809,
      "learning_rate": 4.632822359509207e-05,
      "loss": 1.9352,
      "step": 8919
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5725986957550049,
      "learning_rate": 4.630798263510162e-05,
      "loss": 1.346,
      "step": 8920
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5066553354263306,
      "learning_rate": 4.628774476539567e-05,
      "loss": 1.4439,
      "step": 8921
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1739146709442139,
      "learning_rate": 4.626750998713897e-05,
      "loss": 1.0299,
      "step": 8922
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.248725175857544,
      "learning_rate": 4.6247278301496214e-05,
      "loss": 2.0921,
      "step": 8923
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9105141162872314,
      "learning_rate": 4.622704970963177e-05,
      "loss": 1.0714,
      "step": 8924
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5450941324234009,
      "learning_rate": 4.6206824212709995e-05,
      "loss": 1.3207,
      "step": 8925
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7138073444366455,
      "learning_rate": 4.6186601811894916e-05,
      "loss": 1.586,
      "step": 8926
    },
    {
      "epoch": 0.68,
      "grad_norm": 6.227181911468506,
      "learning_rate": 4.6166382508350506e-05,
      "loss": 2.2688,
      "step": 8927
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4141813516616821,
      "learning_rate": 4.614616630324043e-05,
      "loss": 1.4196,
      "step": 8928
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3185513019561768,
      "learning_rate": 4.612595319772828e-05,
      "loss": 1.4121,
      "step": 8929
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.132875442504883,
      "learning_rate": 4.610574319297748e-05,
      "loss": 1.5805,
      "step": 8930
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2590917348861694,
      "learning_rate": 4.6085536290151166e-05,
      "loss": 0.9705,
      "step": 8931
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1041505336761475,
      "learning_rate": 4.606533249041239e-05,
      "loss": 1.814,
      "step": 8932
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.741180896759033,
      "learning_rate": 4.6045131794924045e-05,
      "loss": 1.5883,
      "step": 8933
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2290626764297485,
      "learning_rate": 4.6024934204848745e-05,
      "loss": 1.1818,
      "step": 8934
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5396878719329834,
      "learning_rate": 4.600473972134894e-05,
      "loss": 1.3384,
      "step": 8935
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4441989660263062,
      "learning_rate": 4.598454834558702e-05,
      "loss": 1.474,
      "step": 8936
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2492555379867554,
      "learning_rate": 4.5964360078725045e-05,
      "loss": 1.8465,
      "step": 8937
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.46552574634552,
      "learning_rate": 4.5944174921925e-05,
      "loss": 1.5314,
      "step": 8938
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0699965953826904,
      "learning_rate": 4.592399287634868e-05,
      "loss": 1.6471,
      "step": 8939
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.3755383491516113,
      "learning_rate": 4.590381394315762e-05,
      "loss": 1.7081,
      "step": 8940
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9792548418045044,
      "learning_rate": 4.588363812351327e-05,
      "loss": 1.6845,
      "step": 8941
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.437008261680603,
      "learning_rate": 4.586346541857689e-05,
      "loss": 1.5751,
      "step": 8942
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7785601615905762,
      "learning_rate": 4.584329582950951e-05,
      "loss": 1.2489,
      "step": 8943
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3596982955932617,
      "learning_rate": 4.582312935747197e-05,
      "loss": 1.098,
      "step": 8944
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3370877504348755,
      "learning_rate": 4.580296600362499e-05,
      "loss": 1.3497,
      "step": 8945
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.390557885169983,
      "learning_rate": 4.578280576912913e-05,
      "loss": 1.5806,
      "step": 8946
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8095351457595825,
      "learning_rate": 4.5762648655144666e-05,
      "loss": 1.2833,
      "step": 8947
    },
    {
      "epoch": 0.68,
      "grad_norm": 4.07739782333374,
      "learning_rate": 4.5742494662831784e-05,
      "loss": 1.6763,
      "step": 8948
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9430445432662964,
      "learning_rate": 4.5722343793350485e-05,
      "loss": 2.0662,
      "step": 8949
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8201956748962402,
      "learning_rate": 4.570219604786051e-05,
      "loss": 1.7916,
      "step": 8950
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.48978853225708,
      "learning_rate": 4.568205142752156e-05,
      "loss": 2.0044,
      "step": 8951
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7200230360031128,
      "learning_rate": 4.566190993349298e-05,
      "loss": 0.7601,
      "step": 8952
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3863625526428223,
      "learning_rate": 4.56417715669341e-05,
      "loss": 0.9834,
      "step": 8953
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.070804238319397,
      "learning_rate": 4.5621636329003937e-05,
      "loss": 1.218,
      "step": 8954
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.181053638458252,
      "learning_rate": 4.560150422086147e-05,
      "loss": 1.1996,
      "step": 8955
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6068477630615234,
      "learning_rate": 4.558137524366533e-05,
      "loss": 1.2062,
      "step": 8956
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7607980966567993,
      "learning_rate": 4.55612493985741e-05,
      "loss": 2.0191,
      "step": 8957
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.339051604270935,
      "learning_rate": 4.554112668674617e-05,
      "loss": 1.1724,
      "step": 8958
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.1958564519882202,
      "learning_rate": 4.5521007109339654e-05,
      "loss": 1.4071,
      "step": 8959
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.18113112449646,
      "learning_rate": 4.550089066751258e-05,
      "loss": 1.0841,
      "step": 8960
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4670777320861816,
      "learning_rate": 4.5480777362422796e-05,
      "loss": 1.4531,
      "step": 8961
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.1035585403442383,
      "learning_rate": 4.5460667195227914e-05,
      "loss": 1.0608,
      "step": 8962
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2696267366409302,
      "learning_rate": 4.544056016708533e-05,
      "loss": 1.7457,
      "step": 8963
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.097466230392456,
      "learning_rate": 4.542045627915242e-05,
      "loss": 1.5432,
      "step": 8964
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.887793779373169,
      "learning_rate": 4.540035553258619e-05,
      "loss": 1.7143,
      "step": 8965
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3359216451644897,
      "learning_rate": 4.5380257928543604e-05,
      "loss": 1.4672,
      "step": 8966
    },
    {
      "epoch": 0.68,
      "grad_norm": 4.479509353637695,
      "learning_rate": 4.536016346818141e-05,
      "loss": 1.3385,
      "step": 8967
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2694432735443115,
      "learning_rate": 4.53400721526561e-05,
      "loss": 1.7182,
      "step": 8968
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.8988960385322571,
      "learning_rate": 4.5319983983124084e-05,
      "loss": 0.7657,
      "step": 8969
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.8551647663116455,
      "learning_rate": 4.529989896074157e-05,
      "loss": 2.8735,
      "step": 8970
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.9904959201812744,
      "learning_rate": 4.5279817086664555e-05,
      "loss": 1.539,
      "step": 8971
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.219721794128418,
      "learning_rate": 4.525973836204881e-05,
      "loss": 2.0618,
      "step": 8972
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2706931829452515,
      "learning_rate": 4.5239662788050075e-05,
      "loss": 0.8697,
      "step": 8973
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6008288860321045,
      "learning_rate": 4.5219590365823714e-05,
      "loss": 1.5852,
      "step": 8974
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4763257503509521,
      "learning_rate": 4.519952109652508e-05,
      "loss": 1.3792,
      "step": 8975
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5026897192001343,
      "learning_rate": 4.5179454981309276e-05,
      "loss": 0.8959,
      "step": 8976
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2137110233306885,
      "learning_rate": 4.515939202133117e-05,
      "loss": 1.714,
      "step": 8977
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4967389106750488,
      "learning_rate": 4.5139332217745544e-05,
      "loss": 1.5923,
      "step": 8978
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8556150197982788,
      "learning_rate": 4.511927557170697e-05,
      "loss": 0.7225,
      "step": 8979
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4889963865280151,
      "learning_rate": 4.5099222084369805e-05,
      "loss": 1.907,
      "step": 8980
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4527997970581055,
      "learning_rate": 4.507917175688819e-05,
      "loss": 1.7972,
      "step": 8981
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5275434255599976,
      "learning_rate": 4.5059124590416234e-05,
      "loss": 1.477,
      "step": 8982
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.01664400100708,
      "learning_rate": 4.503908058610767e-05,
      "loss": 1.3697,
      "step": 8983
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3735746145248413,
      "learning_rate": 4.50190397451162e-05,
      "loss": 1.271,
      "step": 8984
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0500497817993164,
      "learning_rate": 4.499900206859531e-05,
      "loss": 1.1512,
      "step": 8985
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.450005054473877,
      "learning_rate": 4.4978967557698216e-05,
      "loss": 1.1786,
      "step": 8986
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3456445932388306,
      "learning_rate": 4.495893621357806e-05,
      "loss": 1.3676,
      "step": 8987
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2331267595291138,
      "learning_rate": 4.49389080373878e-05,
      "loss": 0.7498,
      "step": 8988
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4715392589569092,
      "learning_rate": 4.491888303028009e-05,
      "loss": 1.2173,
      "step": 8989
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4015566110610962,
      "learning_rate": 4.489886119340756e-05,
      "loss": 1.0632,
      "step": 8990
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.901385545730591,
      "learning_rate": 4.487884252792255e-05,
      "loss": 2.4543,
      "step": 8991
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4862091541290283,
      "learning_rate": 4.4858827034977215e-05,
      "loss": 1.469,
      "step": 8992
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0578029155731201,
      "learning_rate": 4.483881471572359e-05,
      "loss": 0.6713,
      "step": 8993
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.090795636177063,
      "learning_rate": 4.481880557131354e-05,
      "loss": 1.6107,
      "step": 8994
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3121947050094604,
      "learning_rate": 4.479879960289863e-05,
      "loss": 1.4624,
      "step": 8995
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2533177137374878,
      "learning_rate": 4.4778796811630366e-05,
      "loss": 1.124,
      "step": 8996
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4433306455612183,
      "learning_rate": 4.475879719866004e-05,
      "loss": 1.155,
      "step": 8997
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1872379779815674,
      "learning_rate": 4.47388007651387e-05,
      "loss": 1.7041,
      "step": 8998
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.9537293910980225,
      "learning_rate": 4.47188075122173e-05,
      "loss": 1.2257,
      "step": 8999
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3957065343856812,
      "learning_rate": 4.4698817441046545e-05,
      "loss": 1.0192,
      "step": 9000
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7005029916763306,
      "learning_rate": 4.467883055277695e-05,
      "loss": 1.2219,
      "step": 9001
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2761257886886597,
      "learning_rate": 4.4658846848558886e-05,
      "loss": 0.7589,
      "step": 9002
    },
    {
      "epoch": 0.69,
      "grad_norm": 4.62621545791626,
      "learning_rate": 4.46388663295426e-05,
      "loss": 1.0765,
      "step": 9003
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.2140207290649414,
      "learning_rate": 4.461888899687798e-05,
      "loss": 1.9635,
      "step": 9004
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4869204759597778,
      "learning_rate": 4.45989148517149e-05,
      "loss": 1.0125,
      "step": 9005
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7573645114898682,
      "learning_rate": 4.4578943895203005e-05,
      "loss": 1.2,
      "step": 9006
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0004024505615234,
      "learning_rate": 4.455897612849167e-05,
      "loss": 1.2546,
      "step": 9007
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.600260615348816,
      "learning_rate": 4.453901155273024e-05,
      "loss": 1.5186,
      "step": 9008
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6358137130737305,
      "learning_rate": 4.451905016906773e-05,
      "loss": 1.1663,
      "step": 9009
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9486663341522217,
      "learning_rate": 4.449909197865303e-05,
      "loss": 1.4188,
      "step": 9010
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.92917001247406,
      "learning_rate": 4.447913698263485e-05,
      "loss": 1.5153,
      "step": 9011
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.029524087905884,
      "learning_rate": 4.4459185182161776e-05,
      "loss": 1.3231,
      "step": 9012
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.563821315765381,
      "learning_rate": 4.443923657838206e-05,
      "loss": 1.8397,
      "step": 9013
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.094787836074829,
      "learning_rate": 4.441929117244391e-05,
      "loss": 1.9527,
      "step": 9014
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4960349798202515,
      "learning_rate": 4.439934896549532e-05,
      "loss": 1.3746,
      "step": 9015
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.069043517112732,
      "learning_rate": 4.437940995868403e-05,
      "loss": 0.8567,
      "step": 9016
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7594702243804932,
      "learning_rate": 4.435947415315769e-05,
      "loss": 0.7794,
      "step": 9017
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4567418098449707,
      "learning_rate": 4.433954155006366e-05,
      "loss": 1.3654,
      "step": 9018
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.161792278289795,
      "learning_rate": 4.431961215054924e-05,
      "loss": 1.2704,
      "step": 9019
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0895366668701172,
      "learning_rate": 4.4299685955761414e-05,
      "loss": 1.2243,
      "step": 9020
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.256658673286438,
      "learning_rate": 4.427976296684714e-05,
      "loss": 1.3902,
      "step": 9021
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2579692602157593,
      "learning_rate": 4.4259843184952995e-05,
      "loss": 1.8062,
      "step": 9022
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2485368251800537,
      "learning_rate": 4.423992661122555e-05,
      "loss": 1.2397,
      "step": 9023
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2115681171417236,
      "learning_rate": 4.422001324681112e-05,
      "loss": 0.6815,
      "step": 9024
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5799715518951416,
      "learning_rate": 4.420010309285577e-05,
      "loss": 0.9745,
      "step": 9025
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4650429487228394,
      "learning_rate": 4.418019615050551e-05,
      "loss": 0.8398,
      "step": 9026
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.6872105598449707,
      "learning_rate": 4.416029242090609e-05,
      "loss": 1.8486,
      "step": 9027
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0128357410430908,
      "learning_rate": 4.414039190520308e-05,
      "loss": 0.9895,
      "step": 9028
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.991208553314209,
      "learning_rate": 4.4120494604541816e-05,
      "loss": 1.7472,
      "step": 9029
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4520962238311768,
      "learning_rate": 4.410060052006758e-05,
      "loss": 0.9931,
      "step": 9030
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4095008373260498,
      "learning_rate": 4.4080709652925336e-05,
      "loss": 1.6099,
      "step": 9031
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2685585021972656,
      "learning_rate": 4.4060822004259915e-05,
      "loss": 1.3951,
      "step": 9032
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.831393837928772,
      "learning_rate": 4.4040937575216044e-05,
      "loss": 2.0482,
      "step": 9033
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0508908033370972,
      "learning_rate": 4.4021056366938085e-05,
      "loss": 1.1016,
      "step": 9034
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8874306678771973,
      "learning_rate": 4.400117838057036e-05,
      "loss": 1.4747,
      "step": 9035
    },
    {
      "epoch": 0.69,
      "grad_norm": 4.528998374938965,
      "learning_rate": 4.3981303617257e-05,
      "loss": 1.6461,
      "step": 9036
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2823693752288818,
      "learning_rate": 4.3961432078141876e-05,
      "loss": 1.3052,
      "step": 9037
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7258243560791016,
      "learning_rate": 4.3941563764368676e-05,
      "loss": 1.3637,
      "step": 9038
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2945479154586792,
      "learning_rate": 4.392169867708099e-05,
      "loss": 1.4749,
      "step": 9039
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4387505054473877,
      "learning_rate": 4.3901836817422124e-05,
      "loss": 1.5464,
      "step": 9040
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.6096248626708984,
      "learning_rate": 4.388197818653525e-05,
      "loss": 1.5575,
      "step": 9041
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2822265625,
      "learning_rate": 4.386212278556342e-05,
      "loss": 0.6571,
      "step": 9042
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4925180673599243,
      "learning_rate": 4.384227061564932e-05,
      "loss": 1.3401,
      "step": 9043
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7634445428848267,
      "learning_rate": 4.38224216779356e-05,
      "loss": 1.2569,
      "step": 9044
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9222787618637085,
      "learning_rate": 4.380257597356472e-05,
      "loss": 1.3233,
      "step": 9045
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.531665563583374,
      "learning_rate": 4.3782733503678886e-05,
      "loss": 1.7998,
      "step": 9046
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6525757312774658,
      "learning_rate": 4.37628942694201e-05,
      "loss": 1.3065,
      "step": 9047
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.385233759880066,
      "learning_rate": 4.3743058271930295e-05,
      "loss": 2.0226,
      "step": 9048
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.948414921760559,
      "learning_rate": 4.372322551235108e-05,
      "loss": 1.2865,
      "step": 9049
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7485038042068481,
      "learning_rate": 4.370339599182398e-05,
      "loss": 1.5451,
      "step": 9050
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.248704195022583,
      "learning_rate": 4.3683569711490333e-05,
      "loss": 1.2514,
      "step": 9051
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2897382974624634,
      "learning_rate": 4.366374667249118e-05,
      "loss": 1.2959,
      "step": 9052
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5457714796066284,
      "learning_rate": 4.36439268759675e-05,
      "loss": 1.7866,
      "step": 9053
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.461282730102539,
      "learning_rate": 4.362411032306006e-05,
      "loss": 1.5966,
      "step": 9054
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2957333326339722,
      "learning_rate": 4.360429701490934e-05,
      "loss": 1.5032,
      "step": 9055
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2962710857391357,
      "learning_rate": 4.35844869526558e-05,
      "loss": 1.4341,
      "step": 9056
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.711647629737854,
      "learning_rate": 4.356468013743957e-05,
      "loss": 1.4501,
      "step": 9057
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.339497685432434,
      "learning_rate": 4.3544876570400625e-05,
      "loss": 0.7874,
      "step": 9058
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9473193287849426,
      "learning_rate": 4.35250762526788e-05,
      "loss": 1.2519,
      "step": 9059
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6390236616134644,
      "learning_rate": 4.350527918541375e-05,
      "loss": 2.0183,
      "step": 9060
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4822156429290771,
      "learning_rate": 4.348548536974484e-05,
      "loss": 1.7926,
      "step": 9061
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1787736415863037,
      "learning_rate": 4.346569480681136e-05,
      "loss": 1.1731,
      "step": 9062
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6495819091796875,
      "learning_rate": 4.3445907497752416e-05,
      "loss": 1.3249,
      "step": 9063
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3291876316070557,
      "learning_rate": 4.342612344370678e-05,
      "loss": 1.5642,
      "step": 9064
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.8865783214569092,
      "learning_rate": 4.3406342645813237e-05,
      "loss": 0.71,
      "step": 9065
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1615235805511475,
      "learning_rate": 4.338656510521024e-05,
      "loss": 1.2971,
      "step": 9066
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.42236590385437,
      "learning_rate": 4.336679082303606e-05,
      "loss": 1.6867,
      "step": 9067
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2520067691802979,
      "learning_rate": 4.3347019800428865e-05,
      "loss": 1.4889,
      "step": 9068
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.166727304458618,
      "learning_rate": 4.332725203852662e-05,
      "loss": 1.1241,
      "step": 9069
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8256474733352661,
      "learning_rate": 4.3307487538467006e-05,
      "loss": 1.2863,
      "step": 9070
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2050597667694092,
      "learning_rate": 4.328772630138762e-05,
      "loss": 0.5619,
      "step": 9071
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5020233392715454,
      "learning_rate": 4.326796832842587e-05,
      "loss": 1.4104,
      "step": 9072
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9877382516860962,
      "learning_rate": 4.324821362071887e-05,
      "loss": 1.5113,
      "step": 9073
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3771936893463135,
      "learning_rate": 4.322846217940368e-05,
      "loss": 1.6965,
      "step": 9074
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5168051719665527,
      "learning_rate": 4.320871400561709e-05,
      "loss": 1.446,
      "step": 9075
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.384775161743164,
      "learning_rate": 4.3188969100495666e-05,
      "loss": 0.8743,
      "step": 9076
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.0971195697784424,
      "learning_rate": 4.31692274651759e-05,
      "loss": 1.2368,
      "step": 9077
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.959865689277649,
      "learning_rate": 4.314948910079404e-05,
      "loss": 1.0294,
      "step": 9078
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.198778748512268,
      "learning_rate": 4.31297540084861e-05,
      "loss": 1.208,
      "step": 9079
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9679861068725586,
      "learning_rate": 4.311002218938798e-05,
      "loss": 1.3313,
      "step": 9080
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1311873197555542,
      "learning_rate": 4.3090293644635384e-05,
      "loss": 1.1708,
      "step": 9081
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.195075273513794,
      "learning_rate": 4.307056837536373e-05,
      "loss": 1.4917,
      "step": 9082
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.8669904470443726,
      "learning_rate": 4.305084638270837e-05,
      "loss": 0.8915,
      "step": 9083
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8168959617614746,
      "learning_rate": 4.303112766780444e-05,
      "loss": 1.131,
      "step": 9084
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3883161544799805,
      "learning_rate": 4.301141223178684e-05,
      "loss": 0.9505,
      "step": 9085
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.991308331489563,
      "learning_rate": 4.299170007579026e-05,
      "loss": 1.2079,
      "step": 9086
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4162627458572388,
      "learning_rate": 4.2971991200949314e-05,
      "loss": 0.7021,
      "step": 9087
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6341123580932617,
      "learning_rate": 4.295228560839836e-05,
      "loss": 1.3182,
      "step": 9088
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9986720085144043,
      "learning_rate": 4.293258329927151e-05,
      "loss": 1.6945,
      "step": 9089
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3420785665512085,
      "learning_rate": 4.291288427470278e-05,
      "loss": 1.4701,
      "step": 9090
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2165164947509766,
      "learning_rate": 4.2893188535826e-05,
      "loss": 1.1138,
      "step": 9091
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3113510608673096,
      "learning_rate": 4.2873496083774714e-05,
      "loss": 1.289,
      "step": 9092
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4273548126220703,
      "learning_rate": 4.285380691968238e-05,
      "loss": 1.3949,
      "step": 9093
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5890204906463623,
      "learning_rate": 4.283412104468221e-05,
      "loss": 1.9862,
      "step": 9094
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2196283340454102,
      "learning_rate": 4.281443845990719e-05,
      "loss": 0.8376,
      "step": 9095
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.8693410158157349,
      "learning_rate": 4.2794759166490214e-05,
      "loss": 1.073,
      "step": 9096
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2923542261123657,
      "learning_rate": 4.277508316556397e-05,
      "loss": 1.3262,
      "step": 9097
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.361680030822754,
      "learning_rate": 4.2755410458260845e-05,
      "loss": 0.9159,
      "step": 9098
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.548625111579895,
      "learning_rate": 4.273574104571315e-05,
      "loss": 1.3237,
      "step": 9099
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.9435780048370361,
      "learning_rate": 4.271607492905303e-05,
      "loss": 1.0545,
      "step": 9100
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2714964151382446,
      "learning_rate": 4.2696412109412296e-05,
      "loss": 1.1447,
      "step": 9101
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2886430025100708,
      "learning_rate": 4.267675258792273e-05,
      "loss": 1.6791,
      "step": 9102
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6136013269424438,
      "learning_rate": 4.265709636571581e-05,
      "loss": 1.2953,
      "step": 9103
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.666806936264038,
      "learning_rate": 4.263744344392284e-05,
      "loss": 1.5039,
      "step": 9104
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4829953908920288,
      "learning_rate": 4.261779382367499e-05,
      "loss": 1.7694,
      "step": 9105
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.453309178352356,
      "learning_rate": 4.259814750610324e-05,
      "loss": 1.743,
      "step": 9106
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4529731273651123,
      "learning_rate": 4.257850449233828e-05,
      "loss": 1.5776,
      "step": 9107
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6866878271102905,
      "learning_rate": 4.255886478351071e-05,
      "loss": 1.1089,
      "step": 9108
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.712424874305725,
      "learning_rate": 4.253922838075095e-05,
      "loss": 1.2962,
      "step": 9109
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2260074615478516,
      "learning_rate": 4.251959528518912e-05,
      "loss": 1.4116,
      "step": 9110
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5253045558929443,
      "learning_rate": 4.249996549795529e-05,
      "loss": 1.3808,
      "step": 9111
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9853876233100891,
      "learning_rate": 4.248033902017917e-05,
      "loss": 1.5168,
      "step": 9112
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4154607057571411,
      "learning_rate": 4.2460715852990475e-05,
      "loss": 0.9319,
      "step": 9113
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3971081972122192,
      "learning_rate": 4.244109599751855e-05,
      "loss": 1.3321,
      "step": 9114
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1429904699325562,
      "learning_rate": 4.242147945489272e-05,
      "loss": 0.687,
      "step": 9115
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.399484634399414,
      "learning_rate": 4.240186622624193e-05,
      "loss": 1.4406,
      "step": 9116
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.0789225101470947,
      "learning_rate": 4.238225631269509e-05,
      "loss": 1.933,
      "step": 9117
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.935704231262207,
      "learning_rate": 4.236264971538089e-05,
      "loss": 2.3715,
      "step": 9118
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1753599643707275,
      "learning_rate": 4.2343046435427726e-05,
      "loss": 1.5856,
      "step": 9119
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7386096715927124,
      "learning_rate": 4.232344647396392e-05,
      "loss": 0.9437,
      "step": 9120
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9262710809707642,
      "learning_rate": 4.2303849832117606e-05,
      "loss": 1.4434,
      "step": 9121
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.304608702659607,
      "learning_rate": 4.228425651101664e-05,
      "loss": 2.0932,
      "step": 9122
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5583971738815308,
      "learning_rate": 4.22646665117887e-05,
      "loss": 0.9119,
      "step": 9123
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5303521156311035,
      "learning_rate": 4.224507983556137e-05,
      "loss": 1.487,
      "step": 9124
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5844179391860962,
      "learning_rate": 4.222549648346191e-05,
      "loss": 1.5106,
      "step": 9125
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3773102760314941,
      "learning_rate": 4.220591645661748e-05,
      "loss": 1.1845,
      "step": 9126
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3552014827728271,
      "learning_rate": 4.2186339756155067e-05,
      "loss": 1.2228,
      "step": 9127
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0498489141464233,
      "learning_rate": 4.216676638320135e-05,
      "loss": 1.6621,
      "step": 9128
    },
    {
      "epoch": 0.7,
      "grad_norm": 4.317267417907715,
      "learning_rate": 4.2147196338882925e-05,
      "loss": 1.7207,
      "step": 9129
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9195830821990967,
      "learning_rate": 4.212762962432619e-05,
      "loss": 1.0661,
      "step": 9130
    },
    {
      "epoch": 0.7,
      "grad_norm": 4.038409233093262,
      "learning_rate": 4.21080662406573e-05,
      "loss": 1.7687,
      "step": 9131
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.529936671257019,
      "learning_rate": 4.208850618900219e-05,
      "loss": 2.168,
      "step": 9132
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2078924179077148,
      "learning_rate": 4.206894947048673e-05,
      "loss": 1.4848,
      "step": 9133
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.739230990409851,
      "learning_rate": 4.2049396086236445e-05,
      "loss": 1.0188,
      "step": 9134
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3772146701812744,
      "learning_rate": 4.2029846037376794e-05,
      "loss": 1.3861,
      "step": 9135
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.467179298400879,
      "learning_rate": 4.2010299325033034e-05,
      "loss": 1.2856,
      "step": 9136
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5737305879592896,
      "learning_rate": 4.1990755950330094e-05,
      "loss": 1.5823,
      "step": 9137
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0341216325759888,
      "learning_rate": 4.1971215914392866e-05,
      "loss": 0.8249,
      "step": 9138
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2732762098312378,
      "learning_rate": 4.195167921834602e-05,
      "loss": 1.6209,
      "step": 9139
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3655014038085938,
      "learning_rate": 4.1932145863313965e-05,
      "loss": 1.1537,
      "step": 9140
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5655158758163452,
      "learning_rate": 4.191261585042092e-05,
      "loss": 1.2288,
      "step": 9141
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1551413536071777,
      "learning_rate": 4.1893089180791034e-05,
      "loss": 0.8212,
      "step": 9142
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8839604258537292,
      "learning_rate": 4.1873565855548104e-05,
      "loss": 0.9293,
      "step": 9143
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9920266270637512,
      "learning_rate": 4.185404587581584e-05,
      "loss": 0.5577,
      "step": 9144
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.804731845855713,
      "learning_rate": 4.183452924271776e-05,
      "loss": 1.9875,
      "step": 9145
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6596379280090332,
      "learning_rate": 4.18150159573771e-05,
      "loss": 1.7056,
      "step": 9146
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2532522678375244,
      "learning_rate": 4.1795506020916984e-05,
      "loss": 1.8484,
      "step": 9147
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8772066831588745,
      "learning_rate": 4.1775999434460375e-05,
      "loss": 1.6969,
      "step": 9148
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6023061275482178,
      "learning_rate": 4.17564961991299e-05,
      "loss": 1.2507,
      "step": 9149
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8598788976669312,
      "learning_rate": 4.173699631604817e-05,
      "loss": 1.8578,
      "step": 9150
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.2537167072296143,
      "learning_rate": 4.1717499786337465e-05,
      "loss": 2.1869,
      "step": 9151
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.3150928020477295,
      "learning_rate": 4.16980066111199e-05,
      "loss": 1.214,
      "step": 9152
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4276988506317139,
      "learning_rate": 4.1678516791517433e-05,
      "loss": 1.243,
      "step": 9153
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.635737419128418,
      "learning_rate": 4.165903032865187e-05,
      "loss": 1.6378,
      "step": 9154
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7293206453323364,
      "learning_rate": 4.1639547223644706e-05,
      "loss": 1.7169,
      "step": 9155
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2455084323883057,
      "learning_rate": 4.162006747761731e-05,
      "loss": 1.1298,
      "step": 9156
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.664583444595337,
      "learning_rate": 4.160059109169092e-05,
      "loss": 1.4512,
      "step": 9157
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3192274570465088,
      "learning_rate": 4.1581118066986425e-05,
      "loss": 0.9,
      "step": 9158
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5756031274795532,
      "learning_rate": 4.156164840462469e-05,
      "loss": 1.0369,
      "step": 9159
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0355030298233032,
      "learning_rate": 4.154218210572627e-05,
      "loss": 1.519,
      "step": 9160
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5590473413467407,
      "learning_rate": 4.1522719171411516e-05,
      "loss": 1.0122,
      "step": 9161
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.611708641052246,
      "learning_rate": 4.150325960280068e-05,
      "loss": 1.5137,
      "step": 9162
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.6463891267776489,
      "learning_rate": 4.1483803401013796e-05,
      "loss": 2.5463,
      "step": 9163
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1249768733978271,
      "learning_rate": 4.146435056717062e-05,
      "loss": 0.7764,
      "step": 9164
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1974518299102783,
      "learning_rate": 4.144490110239081e-05,
      "loss": 1.9039,
      "step": 9165
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6575027704238892,
      "learning_rate": 4.142545500779382e-05,
      "loss": 1.2208,
      "step": 9166
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1271458864212036,
      "learning_rate": 4.140601228449883e-05,
      "loss": 1.3399,
      "step": 9167
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.398650050163269,
      "learning_rate": 4.1386572933624934e-05,
      "loss": 1.1802,
      "step": 9168
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2491923570632935,
      "learning_rate": 4.136713695629095e-05,
      "loss": 1.6756,
      "step": 9169
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2956500053405762,
      "learning_rate": 4.134770435361549e-05,
      "loss": 1.6986,
      "step": 9170
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1864694356918335,
      "learning_rate": 4.1328275126717064e-05,
      "loss": 1.7804,
      "step": 9171
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4125038385391235,
      "learning_rate": 4.1308849276713955e-05,
      "loss": 1.3388,
      "step": 9172
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5541126728057861,
      "learning_rate": 4.128942680472416e-05,
      "loss": 1.5338,
      "step": 9173
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0680487155914307,
      "learning_rate": 4.12700077118656e-05,
      "loss": 1.2633,
      "step": 9174
    },
    {
      "epoch": 0.7,
      "grad_norm": 5.662622451782227,
      "learning_rate": 4.125059199925599e-05,
      "loss": 2.7087,
      "step": 9175
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4750702381134033,
      "learning_rate": 4.1231179668012746e-05,
      "loss": 1.4873,
      "step": 9176
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5138061046600342,
      "learning_rate": 4.121177071925322e-05,
      "loss": 1.2231,
      "step": 9177
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5665578842163086,
      "learning_rate": 4.1192365154094436e-05,
      "loss": 1.7365,
      "step": 9178
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.10618257522583,
      "learning_rate": 4.1172962973653385e-05,
      "loss": 1.4539,
      "step": 9179
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5208107233047485,
      "learning_rate": 4.11535641790467e-05,
      "loss": 0.9959,
      "step": 9180
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5009397268295288,
      "learning_rate": 4.113416877139095e-05,
      "loss": 1.3107,
      "step": 9181
    },
    {
      "epoch": 0.7,
      "grad_norm": 5.52089262008667,
      "learning_rate": 4.111477675180238e-05,
      "loss": 1.8708,
      "step": 9182
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2474334239959717,
      "learning_rate": 4.109538812139716e-05,
      "loss": 0.9163,
      "step": 9183
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0486036539077759,
      "learning_rate": 4.107600288129125e-05,
      "loss": 0.6144,
      "step": 9184
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5641462802886963,
      "learning_rate": 4.105662103260032e-05,
      "loss": 1.6797,
      "step": 9185
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3768640756607056,
      "learning_rate": 4.103724257643991e-05,
      "loss": 1.4051,
      "step": 9186
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.8385048508644104,
      "learning_rate": 4.1017867513925445e-05,
      "loss": 0.6163,
      "step": 9187
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9625664949417114,
      "learning_rate": 4.099849584617199e-05,
      "loss": 1.5564,
      "step": 9188
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6785690784454346,
      "learning_rate": 4.0979127574294494e-05,
      "loss": 0.7718,
      "step": 9189
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0997748374938965,
      "learning_rate": 4.0959762699407766e-05,
      "loss": 1.9668,
      "step": 9190
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0796172618865967,
      "learning_rate": 4.09404012226263e-05,
      "loss": 1.4088,
      "step": 9191
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.636194109916687,
      "learning_rate": 4.092104314506448e-05,
      "loss": 2.1649,
      "step": 9192
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.217681646347046,
      "learning_rate": 4.090168846783655e-05,
      "loss": 1.61,
      "step": 9193
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8938217163085938,
      "learning_rate": 4.0882337192056374e-05,
      "loss": 1.3359,
      "step": 9194
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1497788429260254,
      "learning_rate": 4.0862989318837785e-05,
      "loss": 1.2299,
      "step": 9195
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.523479700088501,
      "learning_rate": 4.084364484929438e-05,
      "loss": 1.629,
      "step": 9196
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2791121006011963,
      "learning_rate": 4.082430378453953e-05,
      "loss": 1.2783,
      "step": 9197
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0568912029266357,
      "learning_rate": 4.080496612568638e-05,
      "loss": 0.9926,
      "step": 9198
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8148268461227417,
      "learning_rate": 4.0785631873847994e-05,
      "loss": 1.198,
      "step": 9199
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.4327824115753174,
      "learning_rate": 4.076630103013711e-05,
      "loss": 1.5785,
      "step": 9200
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5753545761108398,
      "learning_rate": 4.074697359566634e-05,
      "loss": 1.5893,
      "step": 9201
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.0351669788360596,
      "learning_rate": 4.0727649571548146e-05,
      "loss": 1.1096,
      "step": 9202
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5508875846862793,
      "learning_rate": 4.070832895889466e-05,
      "loss": 1.2387,
      "step": 9203
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2837305068969727,
      "learning_rate": 4.068901175881793e-05,
      "loss": 0.6613,
      "step": 9204
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.5473275184631348,
      "learning_rate": 4.06696979724298e-05,
      "loss": 1.6062,
      "step": 9205
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.063524007797241,
      "learning_rate": 4.065038760084186e-05,
      "loss": 1.9555,
      "step": 9206
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5655395984649658,
      "learning_rate": 4.063108064516551e-05,
      "loss": 1.2254,
      "step": 9207
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.676597833633423,
      "learning_rate": 4.0611777106512015e-05,
      "loss": 1.6936,
      "step": 9208
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6658401489257812,
      "learning_rate": 4.0592476985992364e-05,
      "loss": 1.3579,
      "step": 9209
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.069759488105774,
      "learning_rate": 4.0573180284717425e-05,
      "loss": 1.3252,
      "step": 9210
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4058916568756104,
      "learning_rate": 4.0553887003797854e-05,
      "loss": 1.4319,
      "step": 9211
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.9984927177429199,
      "learning_rate": 4.0534597144344025e-05,
      "loss": 0.8514,
      "step": 9212
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1552534103393555,
      "learning_rate": 4.0515310707466206e-05,
      "loss": 1.5575,
      "step": 9213
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5039321184158325,
      "learning_rate": 4.04960276942745e-05,
      "loss": 1.3166,
      "step": 9214
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.093422770500183,
      "learning_rate": 4.047674810587867e-05,
      "loss": 1.0483,
      "step": 9215
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1157125234603882,
      "learning_rate": 4.045747194338844e-05,
      "loss": 1.0884,
      "step": 9216
    },
    {
      "epoch": 0.7,
      "grad_norm": 4.809112071990967,
      "learning_rate": 4.043819920791322e-05,
      "loss": 1.0872,
      "step": 9217
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0385398864746094,
      "learning_rate": 4.041892990056225e-05,
      "loss": 1.1126,
      "step": 9218
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5974191427230835,
      "learning_rate": 4.039966402244462e-05,
      "loss": 1.1487,
      "step": 9219
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.602205514907837,
      "learning_rate": 4.038040157466918e-05,
      "loss": 1.5192,
      "step": 9220
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8500399589538574,
      "learning_rate": 4.0361142558344634e-05,
      "loss": 1.6772,
      "step": 9221
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5894545316696167,
      "learning_rate": 4.034188697457939e-05,
      "loss": 1.3115,
      "step": 9222
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.370131254196167,
      "learning_rate": 4.0322634824481744e-05,
      "loss": 1.2989,
      "step": 9223
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1206570863723755,
      "learning_rate": 4.0303386109159805e-05,
      "loss": 1.4135,
      "step": 9224
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2075494527816772,
      "learning_rate": 4.028414082972141e-05,
      "loss": 1.3519,
      "step": 9225
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5860027074813843,
      "learning_rate": 4.026489898727419e-05,
      "loss": 1.2974,
      "step": 9226
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1875858306884766,
      "learning_rate": 4.024566058292571e-05,
      "loss": 1.775,
      "step": 9227
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8492960929870605,
      "learning_rate": 4.0226425617783195e-05,
      "loss": 1.4267,
      "step": 9228
    },
    {
      "epoch": 0.7,
      "grad_norm": 4.290650367736816,
      "learning_rate": 4.020719409295373e-05,
      "loss": 1.3875,
      "step": 9229
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.1365997791290283,
      "learning_rate": 4.0187966009544255e-05,
      "loss": 1.4359,
      "step": 9230
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1322635412216187,
      "learning_rate": 4.0168741368661366e-05,
      "loss": 1.073,
      "step": 9231
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0935739278793335,
      "learning_rate": 4.014952017141161e-05,
      "loss": 0.9436,
      "step": 9232
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4707224369049072,
      "learning_rate": 4.013030241890131e-05,
      "loss": 1.2761,
      "step": 9233
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1984118223190308,
      "learning_rate": 4.011108811223652e-05,
      "loss": 1.4196,
      "step": 9234
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.1352896690368652,
      "learning_rate": 4.009187725252309e-05,
      "loss": 2.1767,
      "step": 9235
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2209537029266357,
      "learning_rate": 4.007266984086679e-05,
      "loss": 1.0056,
      "step": 9236
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.0622844696044922,
      "learning_rate": 4.0053465878373054e-05,
      "loss": 1.4555,
      "step": 9237
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5034066438674927,
      "learning_rate": 4.0034265366147205e-05,
      "loss": 1.9397,
      "step": 9238
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.178808331489563,
      "learning_rate": 4.001506830529438e-05,
      "loss": 1.3304,
      "step": 9239
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.528324007987976,
      "learning_rate": 3.999587469691942e-05,
      "loss": 0.8728,
      "step": 9240
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4757391214370728,
      "learning_rate": 3.997668454212705e-05,
      "loss": 0.9285,
      "step": 9241
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.061052680015564,
      "learning_rate": 3.995749784202183e-05,
      "loss": 1.2069,
      "step": 9242
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.142892599105835,
      "learning_rate": 3.993831459770797e-05,
      "loss": 1.3755,
      "step": 9243
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1034833192825317,
      "learning_rate": 3.991913481028965e-05,
      "loss": 0.6773,
      "step": 9244
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8244400024414062,
      "learning_rate": 3.989995848087076e-05,
      "loss": 2.1839,
      "step": 9245
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.990931749343872,
      "learning_rate": 3.988078561055495e-05,
      "loss": 1.2372,
      "step": 9246
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9147419929504395,
      "learning_rate": 3.986161620044578e-05,
      "loss": 1.5195,
      "step": 9247
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.7341485023498535,
      "learning_rate": 3.984245025164659e-05,
      "loss": 1.2884,
      "step": 9248
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9106398820877075,
      "learning_rate": 3.982328776526042e-05,
      "loss": 1.2502,
      "step": 9249
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8192274570465088,
      "learning_rate": 3.980412874239021e-05,
      "loss": 1.4863,
      "step": 9250
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.4692893028259277,
      "learning_rate": 3.978497318413873e-05,
      "loss": 2.3534,
      "step": 9251
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3130922317504883,
      "learning_rate": 3.976582109160839e-05,
      "loss": 1.3332,
      "step": 9252
    },
    {
      "epoch": 0.71,
      "grad_norm": 4.21165657043457,
      "learning_rate": 3.97466724659016e-05,
      "loss": 1.838,
      "step": 9253
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.057684302330017,
      "learning_rate": 3.972752730812043e-05,
      "loss": 1.1081,
      "step": 9254
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4614101648330688,
      "learning_rate": 3.970838561936675e-05,
      "loss": 1.9066,
      "step": 9255
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6178882122039795,
      "learning_rate": 3.9689247400742314e-05,
      "loss": 1.4648,
      "step": 9256
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.2036874294281006,
      "learning_rate": 3.967011265334868e-05,
      "loss": 2.0684,
      "step": 9257
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7394654750823975,
      "learning_rate": 3.965098137828709e-05,
      "loss": 1.6654,
      "step": 9258
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.54591703414917,
      "learning_rate": 3.963185357665868e-05,
      "loss": 1.4819,
      "step": 9259
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4243007898330688,
      "learning_rate": 3.9612729249564414e-05,
      "loss": 0.9311,
      "step": 9260
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3542520999908447,
      "learning_rate": 3.9593608398104945e-05,
      "loss": 1.782,
      "step": 9261
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.667229652404785,
      "learning_rate": 3.9574491023380834e-05,
      "loss": 1.2846,
      "step": 9262
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1203004121780396,
      "learning_rate": 3.955537712649238e-05,
      "loss": 1.4986,
      "step": 9263
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1369056701660156,
      "learning_rate": 3.953626670853965e-05,
      "loss": 1.0932,
      "step": 9264
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.609539270401001,
      "learning_rate": 3.95171597706226e-05,
      "loss": 1.971,
      "step": 9265
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0097670555114746,
      "learning_rate": 3.949805631384098e-05,
      "loss": 1.2215,
      "step": 9266
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.127334713935852,
      "learning_rate": 3.947895633929424e-05,
      "loss": 1.2394,
      "step": 9267
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7135101556777954,
      "learning_rate": 3.9459859848081716e-05,
      "loss": 0.7494,
      "step": 9268
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7723071575164795,
      "learning_rate": 3.9440766841302556e-05,
      "loss": 1.3396,
      "step": 9269
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4900315999984741,
      "learning_rate": 3.942167732005561e-05,
      "loss": 1.6648,
      "step": 9270
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8686603307724,
      "learning_rate": 3.940259128543967e-05,
      "loss": 0.956,
      "step": 9271
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2611387968063354,
      "learning_rate": 3.9383508738553155e-05,
      "loss": 1.3205,
      "step": 9272
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.540205955505371,
      "learning_rate": 3.936442968049445e-05,
      "loss": 1.1307,
      "step": 9273
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1645240783691406,
      "learning_rate": 3.934535411236161e-05,
      "loss": 2.0988,
      "step": 9274
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.08292818069458,
      "learning_rate": 3.932628203525259e-05,
      "loss": 1.2722,
      "step": 9275
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3999943733215332,
      "learning_rate": 3.9307213450265047e-05,
      "loss": 1.1415,
      "step": 9276
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4551838636398315,
      "learning_rate": 3.928814835849652e-05,
      "loss": 1.2754,
      "step": 9277
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6967642307281494,
      "learning_rate": 3.9269086761044346e-05,
      "loss": 1.4847,
      "step": 9278
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.455539584159851,
      "learning_rate": 3.925002865900555e-05,
      "loss": 1.3913,
      "step": 9279
    },
    {
      "epoch": 0.71,
      "grad_norm": 5.878117084503174,
      "learning_rate": 3.9230974053477086e-05,
      "loss": 1.9072,
      "step": 9280
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2239388227462769,
      "learning_rate": 3.921192294555569e-05,
      "loss": 1.4378,
      "step": 9281
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1536494493484497,
      "learning_rate": 3.919287533633782e-05,
      "loss": 0.8434,
      "step": 9282
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2807855606079102,
      "learning_rate": 3.9173831226919746e-05,
      "loss": 1.4253,
      "step": 9283
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4090396165847778,
      "learning_rate": 3.915479061839763e-05,
      "loss": 2.167,
      "step": 9284
    },
    {
      "epoch": 0.71,
      "grad_norm": 5.0876688957214355,
      "learning_rate": 3.91357535118673e-05,
      "loss": 2.1627,
      "step": 9285
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7483444213867188,
      "learning_rate": 3.9116719908424496e-05,
      "loss": 1.8882,
      "step": 9286
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2010669708251953,
      "learning_rate": 3.909768980916474e-05,
      "loss": 0.7981,
      "step": 9287
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.77011775970459,
      "learning_rate": 3.907866321518324e-05,
      "loss": 2.146,
      "step": 9288
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3539929389953613,
      "learning_rate": 3.905964012757514e-05,
      "loss": 1.3921,
      "step": 9289
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9075589179992676,
      "learning_rate": 3.9040620547435356e-05,
      "loss": 1.3662,
      "step": 9290
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.186551809310913,
      "learning_rate": 3.902160447585854e-05,
      "loss": 2.063,
      "step": 9291
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.289928913116455,
      "learning_rate": 3.9002591913939145e-05,
      "loss": 1.2956,
      "step": 9292
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8253095149993896,
      "learning_rate": 3.898358286277152e-05,
      "loss": 1.6089,
      "step": 9293
    },
    {
      "epoch": 0.71,
      "grad_norm": 4.922677516937256,
      "learning_rate": 3.8964577323449655e-05,
      "loss": 1.8345,
      "step": 9294
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.633941411972046,
      "learning_rate": 3.8945575297067506e-05,
      "loss": 2.1023,
      "step": 9295
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.857745885848999,
      "learning_rate": 3.892657678471875e-05,
      "loss": 1.4098,
      "step": 9296
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7964453101158142,
      "learning_rate": 3.8907581787496805e-05,
      "loss": 0.9685,
      "step": 9297
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9030075073242188,
      "learning_rate": 3.8888590306494974e-05,
      "loss": 1.7674,
      "step": 9298
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.351280927658081,
      "learning_rate": 3.886960234280635e-05,
      "loss": 1.5064,
      "step": 9299
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1643532514572144,
      "learning_rate": 3.885061789752379e-05,
      "loss": 1.4877,
      "step": 9300
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.168094515800476,
      "learning_rate": 3.88316369717399e-05,
      "loss": 0.831,
      "step": 9301
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1861305236816406,
      "learning_rate": 3.8812659566547226e-05,
      "loss": 2.0342,
      "step": 9302
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.46492338180542,
      "learning_rate": 3.8793685683037936e-05,
      "loss": 1.4054,
      "step": 9303
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.468453049659729,
      "learning_rate": 3.8774715322304155e-05,
      "loss": 1.4263,
      "step": 9304
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6113255023956299,
      "learning_rate": 3.875574848543774e-05,
      "loss": 1.1617,
      "step": 9305
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3543009757995605,
      "learning_rate": 3.873678517353029e-05,
      "loss": 1.2701,
      "step": 9306
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4007694721221924,
      "learning_rate": 3.871782538767328e-05,
      "loss": 0.9375,
      "step": 9307
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1317076683044434,
      "learning_rate": 3.869886912895798e-05,
      "loss": 2.1248,
      "step": 9308
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4453908205032349,
      "learning_rate": 3.8679916398475394e-05,
      "loss": 1.1867,
      "step": 9309
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.7943828701972961,
      "learning_rate": 3.866096719731639e-05,
      "loss": 0.9124,
      "step": 9310
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6156301498413086,
      "learning_rate": 3.8642021526571595e-05,
      "loss": 1.6371,
      "step": 9311
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.7978880405426025,
      "learning_rate": 3.86230793873314e-05,
      "loss": 1.4465,
      "step": 9312
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1218775510787964,
      "learning_rate": 3.860414078068607e-05,
      "loss": 1.5588,
      "step": 9313
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2825524806976318,
      "learning_rate": 3.858520570772566e-05,
      "loss": 1.2564,
      "step": 9314
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.393345832824707,
      "learning_rate": 3.856627416953993e-05,
      "loss": 1.2582,
      "step": 9315
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3291457891464233,
      "learning_rate": 3.854734616721852e-05,
      "loss": 1.5326,
      "step": 9316
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.166219711303711,
      "learning_rate": 3.85284217018509e-05,
      "loss": 1.0412,
      "step": 9317
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6193172931671143,
      "learning_rate": 3.850950077452621e-05,
      "loss": 0.9874,
      "step": 9318
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1734338998794556,
      "learning_rate": 3.849058338633351e-05,
      "loss": 1.6149,
      "step": 9319
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.573292851448059,
      "learning_rate": 3.8471669538361586e-05,
      "loss": 1.9177,
      "step": 9320
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6775267124176025,
      "learning_rate": 3.8452759231699e-05,
      "loss": 1.5641,
      "step": 9321
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2018533945083618,
      "learning_rate": 3.843385246743417e-05,
      "loss": 0.6415,
      "step": 9322
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.0059865713119507,
      "learning_rate": 3.8414949246655355e-05,
      "loss": 1.0755,
      "step": 9323
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.013818383216858,
      "learning_rate": 3.839604957045044e-05,
      "loss": 0.9278,
      "step": 9324
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1720683574676514,
      "learning_rate": 3.8377153439907266e-05,
      "loss": 1.8405,
      "step": 9325
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1643896102905273,
      "learning_rate": 3.835826085611345e-05,
      "loss": 1.3846,
      "step": 9326
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3955641984939575,
      "learning_rate": 3.8339371820156275e-05,
      "loss": 1.9569,
      "step": 9327
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3991597890853882,
      "learning_rate": 3.832048633312302e-05,
      "loss": 0.8487,
      "step": 9328
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7085596323013306,
      "learning_rate": 3.830160439610059e-05,
      "loss": 1.1777,
      "step": 9329
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.071423053741455,
      "learning_rate": 3.8282726010175715e-05,
      "loss": 1.6268,
      "step": 9330
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.446593999862671,
      "learning_rate": 3.8263851176435015e-05,
      "loss": 1.2899,
      "step": 9331
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6082834005355835,
      "learning_rate": 3.824497989596485e-05,
      "loss": 1.8516,
      "step": 9332
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.671269178390503,
      "learning_rate": 3.8226112169851325e-05,
      "loss": 1.6925,
      "step": 9333
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.8905529975891113,
      "learning_rate": 3.8207247999180404e-05,
      "loss": 1.5008,
      "step": 9334
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.33738112449646,
      "learning_rate": 3.8188387385037874e-05,
      "loss": 1.604,
      "step": 9335
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9531447887420654,
      "learning_rate": 3.8169530328509196e-05,
      "loss": 1.8266,
      "step": 9336
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3424493074417114,
      "learning_rate": 3.8150676830679765e-05,
      "loss": 1.1404,
      "step": 9337
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9307174682617188,
      "learning_rate": 3.813182689263466e-05,
      "loss": 1.7075,
      "step": 9338
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1146268844604492,
      "learning_rate": 3.811298051545885e-05,
      "loss": 1.1927,
      "step": 9339
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2768117189407349,
      "learning_rate": 3.809413770023701e-05,
      "loss": 1.6576,
      "step": 9340
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4417214393615723,
      "learning_rate": 3.807529844805369e-05,
      "loss": 1.3323,
      "step": 9341
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5617016553878784,
      "learning_rate": 3.805646275999315e-05,
      "loss": 1.4766,
      "step": 9342
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3788009881973267,
      "learning_rate": 3.803763063713951e-05,
      "loss": 1.0335,
      "step": 9343
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.692737102508545,
      "learning_rate": 3.801880208057672e-05,
      "loss": 0.8069,
      "step": 9344
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3858587741851807,
      "learning_rate": 3.7999977091388396e-05,
      "loss": 1.0797,
      "step": 9345
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.06674861907959,
      "learning_rate": 3.798115567065805e-05,
      "loss": 0.6767,
      "step": 9346
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5562913417816162,
      "learning_rate": 3.7962337819469006e-05,
      "loss": 1.444,
      "step": 9347
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.563059687614441,
      "learning_rate": 3.794352353890431e-05,
      "loss": 1.9192,
      "step": 9348
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6495351791381836,
      "learning_rate": 3.7924712830046785e-05,
      "loss": 0.9886,
      "step": 9349
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.750129222869873,
      "learning_rate": 3.790590569397917e-05,
      "loss": 1.3021,
      "step": 9350
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4616599082946777,
      "learning_rate": 3.7887102131783857e-05,
      "loss": 1.3256,
      "step": 9351
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6416219472885132,
      "learning_rate": 3.786830214454315e-05,
      "loss": 2.0885,
      "step": 9352
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5533115863800049,
      "learning_rate": 3.784950573333909e-05,
      "loss": 1.3265,
      "step": 9353
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6225290298461914,
      "learning_rate": 3.7830712899253473e-05,
      "loss": 0.8577,
      "step": 9354
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.135449767112732,
      "learning_rate": 3.7811923643367974e-05,
      "loss": 1.2123,
      "step": 9355
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.308250069618225,
      "learning_rate": 3.779313796676406e-05,
      "loss": 1.1584,
      "step": 9356
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7952264547348022,
      "learning_rate": 3.7774355870522905e-05,
      "loss": 2.0085,
      "step": 9357
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1659212112426758,
      "learning_rate": 3.7755577355725505e-05,
      "loss": 1.9183,
      "step": 9358
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4462133646011353,
      "learning_rate": 3.77368024234527e-05,
      "loss": 1.3456,
      "step": 9359
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.4185240268707275,
      "learning_rate": 3.771803107478512e-05,
      "loss": 1.506,
      "step": 9360
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9574956893920898,
      "learning_rate": 3.769926331080311e-05,
      "loss": 0.8913,
      "step": 9361
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5424562692642212,
      "learning_rate": 3.76804991325869e-05,
      "loss": 1.5457,
      "step": 9362
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.391724705696106,
      "learning_rate": 3.766173854121651e-05,
      "loss": 1.2797,
      "step": 9363
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.021805763244629,
      "learning_rate": 3.764298153777163e-05,
      "loss": 2.4288,
      "step": 9364
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5145821571350098,
      "learning_rate": 3.7624228123331915e-05,
      "loss": 1.8322,
      "step": 9365
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.149510383605957,
      "learning_rate": 3.7605478298976715e-05,
      "loss": 1.1897,
      "step": 9366
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.550386905670166,
      "learning_rate": 3.758673206578514e-05,
      "loss": 1.9982,
      "step": 9367
    },
    {
      "epoch": 0.71,
      "grad_norm": 8.272967338562012,
      "learning_rate": 3.756798942483617e-05,
      "loss": 1.8812,
      "step": 9368
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6984224319458008,
      "learning_rate": 3.7549250377208586e-05,
      "loss": 1.1262,
      "step": 9369
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9474843740463257,
      "learning_rate": 3.7530514923980884e-05,
      "loss": 0.8913,
      "step": 9370
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6588337421417236,
      "learning_rate": 3.751178306623141e-05,
      "loss": 1.7157,
      "step": 9371
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.616062879562378,
      "learning_rate": 3.749305480503833e-05,
      "loss": 1.2959,
      "step": 9372
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9427233934402466,
      "learning_rate": 3.7474330141479496e-05,
      "loss": 1.7306,
      "step": 9373
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.809307336807251,
      "learning_rate": 3.745560907663265e-05,
      "loss": 1.3119,
      "step": 9374
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4548817873001099,
      "learning_rate": 3.743689161157533e-05,
      "loss": 1.6177,
      "step": 9375
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.279677152633667,
      "learning_rate": 3.741817774738481e-05,
      "loss": 1.6824,
      "step": 9376
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2360059022903442,
      "learning_rate": 3.739946748513814e-05,
      "loss": 0.7969,
      "step": 9377
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8968266248703003,
      "learning_rate": 3.738076082591227e-05,
      "loss": 1.5403,
      "step": 9378
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1536856889724731,
      "learning_rate": 3.736205777078381e-05,
      "loss": 1.3351,
      "step": 9379
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4387285709381104,
      "learning_rate": 3.734335832082927e-05,
      "loss": 1.0044,
      "step": 9380
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6101629734039307,
      "learning_rate": 3.7324662477124926e-05,
      "loss": 1.4648,
      "step": 9381
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6454564332962036,
      "learning_rate": 3.730597024074677e-05,
      "loss": 1.4695,
      "step": 9382
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.429376244544983,
      "learning_rate": 3.72872816127707e-05,
      "loss": 1.0202,
      "step": 9383
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.97673761844635,
      "learning_rate": 3.7268596594272366e-05,
      "loss": 0.9828,
      "step": 9384
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4900039434432983,
      "learning_rate": 3.724991518632717e-05,
      "loss": 1.6627,
      "step": 9385
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.52574622631073,
      "learning_rate": 3.72312373900103e-05,
      "loss": 1.0953,
      "step": 9386
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9910916090011597,
      "learning_rate": 3.7212563206396846e-05,
      "loss": 0.9357,
      "step": 9387
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.263730525970459,
      "learning_rate": 3.7193892636561544e-05,
      "loss": 1.1991,
      "step": 9388
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7891689538955688,
      "learning_rate": 3.717522568157902e-05,
      "loss": 1.4709,
      "step": 9389
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0564053058624268,
      "learning_rate": 3.7156562342523704e-05,
      "loss": 1.5119,
      "step": 9390
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6538424491882324,
      "learning_rate": 3.7137902620469714e-05,
      "loss": 1.4482,
      "step": 9391
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1122554540634155,
      "learning_rate": 3.7119246516491046e-05,
      "loss": 1.1936,
      "step": 9392
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.862882375717163,
      "learning_rate": 3.7100594031661514e-05,
      "loss": 1.4627,
      "step": 9393
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.254613995552063,
      "learning_rate": 3.708194516705464e-05,
      "loss": 1.4315,
      "step": 9394
    },
    {
      "epoch": 0.72,
      "grad_norm": 5.203364849090576,
      "learning_rate": 3.706329992374373e-05,
      "loss": 1.1794,
      "step": 9395
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5141510963439941,
      "learning_rate": 3.7044658302802007e-05,
      "loss": 1.1635,
      "step": 9396
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.494489908218384,
      "learning_rate": 3.702602030530231e-05,
      "loss": 1.361,
      "step": 9397
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6742576360702515,
      "learning_rate": 3.700738593231743e-05,
      "loss": 1.3941,
      "step": 9398
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2096691131591797,
      "learning_rate": 3.698875518491989e-05,
      "loss": 2.0468,
      "step": 9399
    },
    {
      "epoch": 0.72,
      "grad_norm": 5.271038055419922,
      "learning_rate": 3.697012806418194e-05,
      "loss": 2.0178,
      "step": 9400
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.492576241493225,
      "learning_rate": 3.6951504571175714e-05,
      "loss": 1.7866,
      "step": 9401
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2521634101867676,
      "learning_rate": 3.693288470697314e-05,
      "loss": 0.8124,
      "step": 9402
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9958571195602417,
      "learning_rate": 3.691426847264581e-05,
      "loss": 1.0653,
      "step": 9403
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.963215708732605,
      "learning_rate": 3.6895655869265275e-05,
      "loss": 1.3379,
      "step": 9404
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2601780891418457,
      "learning_rate": 3.687704689790277e-05,
      "loss": 1.2319,
      "step": 9405
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9815926551818848,
      "learning_rate": 3.6858441559629306e-05,
      "loss": 1.0099,
      "step": 9406
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3264483213424683,
      "learning_rate": 3.6839839855515765e-05,
      "loss": 1.5459,
      "step": 9407
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9487545490264893,
      "learning_rate": 3.682124178663282e-05,
      "loss": 1.3582,
      "step": 9408
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4268908500671387,
      "learning_rate": 3.680264735405082e-05,
      "loss": 1.0736,
      "step": 9409
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2863502502441406,
      "learning_rate": 3.678405655884002e-05,
      "loss": 1.3199,
      "step": 9410
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3789910078048706,
      "learning_rate": 3.6765469402070454e-05,
      "loss": 1.0736,
      "step": 9411
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7586551904678345,
      "learning_rate": 3.674688588481187e-05,
      "loss": 1.4336,
      "step": 9412
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.135277509689331,
      "learning_rate": 3.67283060081339e-05,
      "loss": 1.4214,
      "step": 9413
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.961085319519043,
      "learning_rate": 3.670972977310591e-05,
      "loss": 1.5716,
      "step": 9414
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.175917625427246,
      "learning_rate": 3.669115718079702e-05,
      "loss": 1.1192,
      "step": 9415
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1725215911865234,
      "learning_rate": 3.667258823227624e-05,
      "loss": 1.2529,
      "step": 9416
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2410272359848022,
      "learning_rate": 3.665402292861233e-05,
      "loss": 1.2227,
      "step": 9417
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3458831310272217,
      "learning_rate": 3.663546127087378e-05,
      "loss": 1.3307,
      "step": 9418
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.351218581199646,
      "learning_rate": 3.661690326012897e-05,
      "loss": 1.2178,
      "step": 9419
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8220505714416504,
      "learning_rate": 3.6598348897446013e-05,
      "loss": 1.0772,
      "step": 9420
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0865877866744995,
      "learning_rate": 3.657979818389278e-05,
      "loss": 1.5356,
      "step": 9421
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.616424798965454,
      "learning_rate": 3.6561251120537035e-05,
      "loss": 1.4526,
      "step": 9422
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8060427904129028,
      "learning_rate": 3.6542707708446225e-05,
      "loss": 0.7032,
      "step": 9423
    },
    {
      "epoch": 0.72,
      "grad_norm": 4.345711708068848,
      "learning_rate": 3.6524167948687595e-05,
      "loss": 1.3502,
      "step": 9424
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2150874137878418,
      "learning_rate": 3.650563184232827e-05,
      "loss": 1.2138,
      "step": 9425
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4808340072631836,
      "learning_rate": 3.648709939043512e-05,
      "loss": 1.9845,
      "step": 9426
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3903181552886963,
      "learning_rate": 3.6468570594074746e-05,
      "loss": 1.6344,
      "step": 9427
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1609119176864624,
      "learning_rate": 3.64500454543136e-05,
      "loss": 0.9805,
      "step": 9428
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1211251020431519,
      "learning_rate": 3.643152397221796e-05,
      "loss": 1.1465,
      "step": 9429
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5941482782363892,
      "learning_rate": 3.641300614885378e-05,
      "loss": 1.7514,
      "step": 9430
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8375581502914429,
      "learning_rate": 3.639449198528692e-05,
      "loss": 1.5269,
      "step": 9431
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.528397798538208,
      "learning_rate": 3.63759814825829e-05,
      "loss": 1.1915,
      "step": 9432
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2243835926055908,
      "learning_rate": 3.63574746418072e-05,
      "loss": 1.0694,
      "step": 9433
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9221391677856445,
      "learning_rate": 3.6338971464024916e-05,
      "loss": 1.7831,
      "step": 9434
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3955334424972534,
      "learning_rate": 3.632047195030108e-05,
      "loss": 1.8648,
      "step": 9435
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.3948962688446045,
      "learning_rate": 3.6301976101700386e-05,
      "loss": 1.6253,
      "step": 9436
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8184685707092285,
      "learning_rate": 3.6283483919287384e-05,
      "loss": 2.0338,
      "step": 9437
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5406023263931274,
      "learning_rate": 3.626499540412648e-05,
      "loss": 1.0551,
      "step": 9438
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.137265920639038,
      "learning_rate": 3.6246510557281685e-05,
      "loss": 0.7988,
      "step": 9439
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7858755588531494,
      "learning_rate": 3.622802937981697e-05,
      "loss": 0.5663,
      "step": 9440
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.493332028388977,
      "learning_rate": 3.6209551872796056e-05,
      "loss": 1.6791,
      "step": 9441
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0307573080062866,
      "learning_rate": 3.61910780372824e-05,
      "loss": 0.9337,
      "step": 9442
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5884088277816772,
      "learning_rate": 3.6172607874339235e-05,
      "loss": 1.5471,
      "step": 9443
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4981040954589844,
      "learning_rate": 3.615414138502971e-05,
      "loss": 1.3239,
      "step": 9444
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3382139205932617,
      "learning_rate": 3.61356785704166e-05,
      "loss": 1.9898,
      "step": 9445
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.464675784111023,
      "learning_rate": 3.611721943156257e-05,
      "loss": 2.0748,
      "step": 9446
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8870552778244019,
      "learning_rate": 3.609876396953009e-05,
      "loss": 1.3339,
      "step": 9447
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.256610631942749,
      "learning_rate": 3.6080312185381326e-05,
      "loss": 1.5008,
      "step": 9448
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1204885244369507,
      "learning_rate": 3.60618640801783e-05,
      "loss": 1.546,
      "step": 9449
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5434536933898926,
      "learning_rate": 3.604341965498285e-05,
      "loss": 1.2103,
      "step": 9450
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5473138093948364,
      "learning_rate": 3.602497891085652e-05,
      "loss": 1.6456,
      "step": 9451
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.817211151123047,
      "learning_rate": 3.600654184886064e-05,
      "loss": 2.5944,
      "step": 9452
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3142701387405396,
      "learning_rate": 3.5988108470056445e-05,
      "loss": 1.3839,
      "step": 9453
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2522789239883423,
      "learning_rate": 3.5969678775504825e-05,
      "loss": 1.1974,
      "step": 9454
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3900346755981445,
      "learning_rate": 3.595125276626653e-05,
      "loss": 1.3386,
      "step": 9455
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1085518598556519,
      "learning_rate": 3.5932830443402135e-05,
      "loss": 1.3828,
      "step": 9456
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5192286968231201,
      "learning_rate": 3.591441180797186e-05,
      "loss": 1.4794,
      "step": 9457
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.737014651298523,
      "learning_rate": 3.589599686103585e-05,
      "loss": 1.3997,
      "step": 9458
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3329310417175293,
      "learning_rate": 3.587758560365403e-05,
      "loss": 1.1252,
      "step": 9459
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6675312519073486,
      "learning_rate": 3.585917803688603e-05,
      "loss": 1.9109,
      "step": 9460
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5386996269226074,
      "learning_rate": 3.584077416179129e-05,
      "loss": 1.0779,
      "step": 9461
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1966946125030518,
      "learning_rate": 3.5822373979429114e-05,
      "loss": 1.5407,
      "step": 9462
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2411404848098755,
      "learning_rate": 3.5803977490858475e-05,
      "loss": 1.282,
      "step": 9463
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8077791929244995,
      "learning_rate": 3.578558469713823e-05,
      "loss": 1.5873,
      "step": 9464
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1152448654174805,
      "learning_rate": 3.576719559932703e-05,
      "loss": 0.9697,
      "step": 9465
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7856829166412354,
      "learning_rate": 3.57488101984832e-05,
      "loss": 1.2421,
      "step": 9466
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9929717779159546,
      "learning_rate": 3.573042849566495e-05,
      "loss": 1.06,
      "step": 9467
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.251306176185608,
      "learning_rate": 3.5712050491930305e-05,
      "loss": 1.0807,
      "step": 9468
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.223740577697754,
      "learning_rate": 3.5693676188336944e-05,
      "loss": 0.8587,
      "step": 9469
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.1041268110275269,
      "learning_rate": 3.5675305585942496e-05,
      "loss": 0.6763,
      "step": 9470
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8523273468017578,
      "learning_rate": 3.565693868580424e-05,
      "loss": 0.9453,
      "step": 9471
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.721379041671753,
      "learning_rate": 3.563857548897927e-05,
      "loss": 2.2802,
      "step": 9472
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8389713764190674,
      "learning_rate": 3.5620215996524555e-05,
      "loss": 1.1547,
      "step": 9473
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4603992700576782,
      "learning_rate": 3.560186020949679e-05,
      "loss": 1.9418,
      "step": 9474
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7190983295440674,
      "learning_rate": 3.558350812895238e-05,
      "loss": 1.5864,
      "step": 9475
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4883360862731934,
      "learning_rate": 3.556515975594766e-05,
      "loss": 1.2429,
      "step": 9476
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1009681224823,
      "learning_rate": 3.554681509153871e-05,
      "loss": 1.0793,
      "step": 9477
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.49758780002594,
      "learning_rate": 3.5528474136781285e-05,
      "loss": 1.5886,
      "step": 9478
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4539718627929688,
      "learning_rate": 3.5510136892731096e-05,
      "loss": 1.255,
      "step": 9479
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.330470323562622,
      "learning_rate": 3.549180336044352e-05,
      "loss": 1.5045,
      "step": 9480
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9888125658035278,
      "learning_rate": 3.5473473540973724e-05,
      "loss": 2.5596,
      "step": 9481
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3279308080673218,
      "learning_rate": 3.5455147435376724e-05,
      "loss": 1.5693,
      "step": 9482
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5273257493972778,
      "learning_rate": 3.543682504470732e-05,
      "loss": 1.4256,
      "step": 9483
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2714508771896362,
      "learning_rate": 3.5418506370020024e-05,
      "loss": 1.932,
      "step": 9484
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3386517763137817,
      "learning_rate": 3.54001914123692e-05,
      "loss": 1.2539,
      "step": 9485
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.237375259399414,
      "learning_rate": 3.538188017280901e-05,
      "loss": 0.9483,
      "step": 9486
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.87931489944458,
      "learning_rate": 3.5363572652393326e-05,
      "loss": 1.7151,
      "step": 9487
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8729020357131958,
      "learning_rate": 3.5345268852175895e-05,
      "loss": 1.0836,
      "step": 9488
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9238219261169434,
      "learning_rate": 3.532696877321018e-05,
      "loss": 1.9985,
      "step": 9489
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0167112350463867,
      "learning_rate": 3.530867241654942e-05,
      "loss": 1.6641,
      "step": 9490
    },
    {
      "epoch": 0.72,
      "grad_norm": 4.215875625610352,
      "learning_rate": 3.529037978324671e-05,
      "loss": 1.8092,
      "step": 9491
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6185901165008545,
      "learning_rate": 3.52720908743549e-05,
      "loss": 1.6526,
      "step": 9492
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.789292812347412,
      "learning_rate": 3.5253805690926643e-05,
      "loss": 0.9567,
      "step": 9493
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.0261595249176025,
      "learning_rate": 3.5235524234014316e-05,
      "loss": 1.3801,
      "step": 9494
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.249420404434204,
      "learning_rate": 3.521724650467013e-05,
      "loss": 0.8645,
      "step": 9495
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5775787830352783,
      "learning_rate": 3.519897250394612e-05,
      "loss": 1.6669,
      "step": 9496
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.295161485671997,
      "learning_rate": 3.518070223289401e-05,
      "loss": 1.1011,
      "step": 9497
    },
    {
      "epoch": 0.72,
      "grad_norm": 4.133512496948242,
      "learning_rate": 3.516243569256535e-05,
      "loss": 0.8105,
      "step": 9498
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.25662362575531,
      "learning_rate": 3.514417288401155e-05,
      "loss": 1.0862,
      "step": 9499
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.9864327907562256,
      "learning_rate": 3.512591380828365e-05,
      "loss": 1.1576,
      "step": 9500
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0144412517547607,
      "learning_rate": 3.5107658466432605e-05,
      "loss": 1.7503,
      "step": 9501
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.6700339317321777,
      "learning_rate": 3.5089406859509166e-05,
      "loss": 1.2749,
      "step": 9502
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0329387187957764,
      "learning_rate": 3.5071158988563725e-05,
      "loss": 1.2398,
      "step": 9503
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0959526300430298,
      "learning_rate": 3.505291485464661e-05,
      "loss": 1.4587,
      "step": 9504
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.601866602897644,
      "learning_rate": 3.503467445880789e-05,
      "loss": 1.7393,
      "step": 9505
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.076683521270752,
      "learning_rate": 3.5016437802097355e-05,
      "loss": 1.897,
      "step": 9506
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.432169198989868,
      "learning_rate": 3.499820488556469e-05,
      "loss": 1.3977,
      "step": 9507
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.655911684036255,
      "learning_rate": 3.497997571025926e-05,
      "loss": 1.3857,
      "step": 9508
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.43505597114563,
      "learning_rate": 3.496175027723024e-05,
      "loss": 1.5237,
      "step": 9509
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.492952823638916,
      "learning_rate": 3.494352858752664e-05,
      "loss": 1.7183,
      "step": 9510
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.7616748809814453,
      "learning_rate": 3.492531064219725e-05,
      "loss": 1.3652,
      "step": 9511
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.759531855583191,
      "learning_rate": 3.4907096442290545e-05,
      "loss": 1.1247,
      "step": 9512
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8355592489242554,
      "learning_rate": 3.48888859888549e-05,
      "loss": 1.2128,
      "step": 9513
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2791695594787598,
      "learning_rate": 3.487067928293848e-05,
      "loss": 1.268,
      "step": 9514
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9698097705841064,
      "learning_rate": 3.485247632558909e-05,
      "loss": 1.2462,
      "step": 9515
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.023005723953247,
      "learning_rate": 3.483427711785449e-05,
      "loss": 0.958,
      "step": 9516
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1512280702590942,
      "learning_rate": 3.481608166078213e-05,
      "loss": 1.2565,
      "step": 9517
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.374578595161438,
      "learning_rate": 3.4797889955419206e-05,
      "loss": 1.1303,
      "step": 9518
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1762722730636597,
      "learning_rate": 3.477970200281281e-05,
      "loss": 1.5186,
      "step": 9519
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3077661991119385,
      "learning_rate": 3.476151780400979e-05,
      "loss": 1.3571,
      "step": 9520
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.100192666053772,
      "learning_rate": 3.474333736005668e-05,
      "loss": 1.0246,
      "step": 9521
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.918028712272644,
      "learning_rate": 3.472516067199991e-05,
      "loss": 1.8526,
      "step": 9522
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5021004676818848,
      "learning_rate": 3.470698774088569e-05,
      "loss": 1.3795,
      "step": 9523
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.086103916168213,
      "learning_rate": 3.4688818567759884e-05,
      "loss": 1.8669,
      "step": 9524
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7514300346374512,
      "learning_rate": 3.467065315366833e-05,
      "loss": 2.1551,
      "step": 9525
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.08342444896698,
      "learning_rate": 3.465249149965647e-05,
      "loss": 1.3323,
      "step": 9526
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.568081021308899,
      "learning_rate": 3.4634333606769685e-05,
      "loss": 1.3982,
      "step": 9527
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.087114095687866,
      "learning_rate": 3.461617947605299e-05,
      "loss": 1.2874,
      "step": 9528
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5806125402450562,
      "learning_rate": 3.4598029108551334e-05,
      "loss": 1.4266,
      "step": 9529
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2814995050430298,
      "learning_rate": 3.457988250530931e-05,
      "loss": 0.7751,
      "step": 9530
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2051947116851807,
      "learning_rate": 3.456173966737138e-05,
      "loss": 1.4193,
      "step": 9531
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.6059985160827637,
      "learning_rate": 3.4543600595781825e-05,
      "loss": 1.181,
      "step": 9532
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5226054191589355,
      "learning_rate": 3.452546529158456e-05,
      "loss": 1.4584,
      "step": 9533
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.229527473449707,
      "learning_rate": 3.450733375582342e-05,
      "loss": 1.1262,
      "step": 9534
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4998366832733154,
      "learning_rate": 3.448920598954203e-05,
      "loss": 1.2919,
      "step": 9535
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.182239055633545,
      "learning_rate": 3.447108199378368e-05,
      "loss": 1.2581,
      "step": 9536
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.205897569656372,
      "learning_rate": 3.445296176959151e-05,
      "loss": 1.9446,
      "step": 9537
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4767985343933105,
      "learning_rate": 3.443484531800849e-05,
      "loss": 1.7489,
      "step": 9538
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.317359209060669,
      "learning_rate": 3.4416732640077265e-05,
      "loss": 1.0574,
      "step": 9539
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1987041234970093,
      "learning_rate": 3.4398623736840353e-05,
      "loss": 1.3855,
      "step": 9540
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.9622018337249756,
      "learning_rate": 3.4380518609340076e-05,
      "loss": 2.1832,
      "step": 9541
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2268788814544678,
      "learning_rate": 3.4362417258618384e-05,
      "loss": 1.0258,
      "step": 9542
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7920523881912231,
      "learning_rate": 3.4344319685717184e-05,
      "loss": 1.5503,
      "step": 9543
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4790834188461304,
      "learning_rate": 3.432622589167812e-05,
      "loss": 1.5141,
      "step": 9544
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9793848991394043,
      "learning_rate": 3.430813587754256e-05,
      "loss": 1.5674,
      "step": 9545
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6778550148010254,
      "learning_rate": 3.4290049644351644e-05,
      "loss": 1.7414,
      "step": 9546
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2637592554092407,
      "learning_rate": 3.4271967193146405e-05,
      "loss": 1.5182,
      "step": 9547
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4601554870605469,
      "learning_rate": 3.425388852496755e-05,
      "loss": 1.6965,
      "step": 9548
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9659027457237244,
      "learning_rate": 3.4235813640855605e-05,
      "loss": 1.1099,
      "step": 9549
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.009004831314087,
      "learning_rate": 3.421774254185096e-05,
      "loss": 1.5117,
      "step": 9550
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1749380826950073,
      "learning_rate": 3.419967522899361e-05,
      "loss": 1.6563,
      "step": 9551
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.228016972541809,
      "learning_rate": 3.418161170332349e-05,
      "loss": 1.4504,
      "step": 9552
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.476452112197876,
      "learning_rate": 3.416355196588028e-05,
      "loss": 1.7791,
      "step": 9553
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.188828945159912,
      "learning_rate": 3.4145496017703384e-05,
      "loss": 2.2413,
      "step": 9554
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3961262702941895,
      "learning_rate": 3.412744385983201e-05,
      "loss": 1.2786,
      "step": 9555
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.8389835357666016,
      "learning_rate": 3.410939549330521e-05,
      "loss": 2.2052,
      "step": 9556
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.538944959640503,
      "learning_rate": 3.409135091916171e-05,
      "loss": 1.6741,
      "step": 9557
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6358941793441772,
      "learning_rate": 3.407331013844013e-05,
      "loss": 1.4749,
      "step": 9558
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7735978364944458,
      "learning_rate": 3.405527315217883e-05,
      "loss": 1.2754,
      "step": 9559
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8636057376861572,
      "learning_rate": 3.403723996141589e-05,
      "loss": 2.0977,
      "step": 9560
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.217118740081787,
      "learning_rate": 3.4019210567189255e-05,
      "loss": 1.4178,
      "step": 9561
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1929059028625488,
      "learning_rate": 3.400118497053665e-05,
      "loss": 1.8771,
      "step": 9562
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4241909980773926,
      "learning_rate": 3.398316317249548e-05,
      "loss": 0.6161,
      "step": 9563
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6111741065979004,
      "learning_rate": 3.396514517410308e-05,
      "loss": 1.2844,
      "step": 9564
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.7995760440826416,
      "learning_rate": 3.394713097639647e-05,
      "loss": 1.5215,
      "step": 9565
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2347664833068848,
      "learning_rate": 3.39291205804124e-05,
      "loss": 0.9944,
      "step": 9566
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.403151512145996,
      "learning_rate": 3.3911113987187535e-05,
      "loss": 1.7804,
      "step": 9567
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.22306227684021,
      "learning_rate": 3.389311119775828e-05,
      "loss": 1.3,
      "step": 9568
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.7751686573028564,
      "learning_rate": 3.387511221316073e-05,
      "loss": 1.5992,
      "step": 9569
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.405832529067993,
      "learning_rate": 3.385711703443087e-05,
      "loss": 1.159,
      "step": 9570
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0417312383651733,
      "learning_rate": 3.383912566260447e-05,
      "loss": 1.1186,
      "step": 9571
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4925471544265747,
      "learning_rate": 3.3821138098716944e-05,
      "loss": 1.0587,
      "step": 9572
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6017876863479614,
      "learning_rate": 3.3803154343803656e-05,
      "loss": 0.6758,
      "step": 9573
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.230692744255066,
      "learning_rate": 3.3785174398899655e-05,
      "loss": 1.8854,
      "step": 9574
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3776626586914062,
      "learning_rate": 3.3767198265039745e-05,
      "loss": 1.368,
      "step": 9575
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.103081464767456,
      "learning_rate": 3.374922594325859e-05,
      "loss": 1.8368,
      "step": 9576
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0389025211334229,
      "learning_rate": 3.373125743459063e-05,
      "loss": 0.5462,
      "step": 9577
    },
    {
      "epoch": 0.73,
      "grad_norm": 4.353646278381348,
      "learning_rate": 3.371329274007e-05,
      "loss": 1.913,
      "step": 9578
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2620830535888672,
      "learning_rate": 3.3695331860730704e-05,
      "loss": 1.552,
      "step": 9579
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4157586097717285,
      "learning_rate": 3.367737479760652e-05,
      "loss": 1.1727,
      "step": 9580
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.899340271949768,
      "learning_rate": 3.365942155173092e-05,
      "loss": 1.706,
      "step": 9581
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.834749937057495,
      "learning_rate": 3.364147212413726e-05,
      "loss": 1.0553,
      "step": 9582
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3416284322738647,
      "learning_rate": 3.362352651585864e-05,
      "loss": 1.4157,
      "step": 9583
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9186122417449951,
      "learning_rate": 3.3605584727927875e-05,
      "loss": 1.1832,
      "step": 9584
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3016397953033447,
      "learning_rate": 3.3587646761377644e-05,
      "loss": 1.0878,
      "step": 9585
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.104848861694336,
      "learning_rate": 3.3569712617240435e-05,
      "loss": 1.1914,
      "step": 9586
    },
    {
      "epoch": 0.73,
      "grad_norm": 5.4983649253845215,
      "learning_rate": 3.3551782296548387e-05,
      "loss": 1.4139,
      "step": 9587
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3204792737960815,
      "learning_rate": 3.353385580033351e-05,
      "loss": 0.9406,
      "step": 9588
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3013451099395752,
      "learning_rate": 3.3515933129627644e-05,
      "loss": 1.077,
      "step": 9589
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.9251663088798523,
      "learning_rate": 3.349801428546224e-05,
      "loss": 0.9393,
      "step": 9590
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1623406410217285,
      "learning_rate": 3.348009926886872e-05,
      "loss": 1.4784,
      "step": 9591
    },
    {
      "epoch": 0.73,
      "grad_norm": 9.706071853637695,
      "learning_rate": 3.346218808087811e-05,
      "loss": 2.8818,
      "step": 9592
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3630295991897583,
      "learning_rate": 3.3444280722521394e-05,
      "loss": 1.726,
      "step": 9593
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4190646409988403,
      "learning_rate": 3.342637719482915e-05,
      "loss": 0.8874,
      "step": 9594
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.016441822052002,
      "learning_rate": 3.340847749883191e-05,
      "loss": 2.3444,
      "step": 9595
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5022873878479004,
      "learning_rate": 3.339058163555985e-05,
      "loss": 1.426,
      "step": 9596
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3767929077148438,
      "learning_rate": 3.3372689606043e-05,
      "loss": 1.3325,
      "step": 9597
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2585657835006714,
      "learning_rate": 3.3354801411311167e-05,
      "loss": 0.8668,
      "step": 9598
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2262567281723022,
      "learning_rate": 3.3336917052393876e-05,
      "loss": 1.1621,
      "step": 9599
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8209789991378784,
      "learning_rate": 3.33190365303205e-05,
      "loss": 1.7553,
      "step": 9600
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6778119802474976,
      "learning_rate": 3.33011598461202e-05,
      "loss": 1.4622,
      "step": 9601
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.8400046825408936,
      "learning_rate": 3.328328700082185e-05,
      "loss": 1.2539,
      "step": 9602
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7574217319488525,
      "learning_rate": 3.3265417995454094e-05,
      "loss": 0.7159,
      "step": 9603
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8620617389678955,
      "learning_rate": 3.3247552831045484e-05,
      "loss": 1.6085,
      "step": 9604
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2713595628738403,
      "learning_rate": 3.322969150862416e-05,
      "loss": 1.4702,
      "step": 9605
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3708604574203491,
      "learning_rate": 3.321183402921821e-05,
      "loss": 1.5793,
      "step": 9606
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2074393033981323,
      "learning_rate": 3.319398039385546e-05,
      "loss": 1.4061,
      "step": 9607
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.115163564682007,
      "learning_rate": 3.3176130603563414e-05,
      "loss": 1.9452,
      "step": 9608
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8949812650680542,
      "learning_rate": 3.315828465936948e-05,
      "loss": 1.4978,
      "step": 9609
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2086422443389893,
      "learning_rate": 3.31404425623008e-05,
      "loss": 1.2502,
      "step": 9610
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.403434991836548,
      "learning_rate": 3.312260431338428e-05,
      "loss": 1.6024,
      "step": 9611
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.219364047050476,
      "learning_rate": 3.310476991364656e-05,
      "loss": 1.4608,
      "step": 9612
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1179393529891968,
      "learning_rate": 3.308693936411421e-05,
      "loss": 1.067,
      "step": 9613
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.514949083328247,
      "learning_rate": 3.306911266581338e-05,
      "loss": 1.2321,
      "step": 9614
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.319626808166504,
      "learning_rate": 3.305128981977017e-05,
      "loss": 1.7572,
      "step": 9615
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4656643867492676,
      "learning_rate": 3.303347082701038e-05,
      "loss": 1.8479,
      "step": 9616
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7957698106765747,
      "learning_rate": 3.3015655688559554e-05,
      "loss": 1.1119,
      "step": 9617
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.616774082183838,
      "learning_rate": 3.299784440544309e-05,
      "loss": 0.8651,
      "step": 9618
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0720046758651733,
      "learning_rate": 3.2980036978686144e-05,
      "loss": 0.8703,
      "step": 9619
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2073410749435425,
      "learning_rate": 3.2962233409313626e-05,
      "loss": 1.5937,
      "step": 9620
    },
    {
      "epoch": 0.73,
      "grad_norm": 9.100342750549316,
      "learning_rate": 3.2944433698350184e-05,
      "loss": 2.8545,
      "step": 9621
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.552844524383545,
      "learning_rate": 3.292663784682036e-05,
      "loss": 1.4144,
      "step": 9622
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0511822700500488,
      "learning_rate": 3.290884585574836e-05,
      "loss": 1.1094,
      "step": 9623
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3182519674301147,
      "learning_rate": 3.2891057726158225e-05,
      "loss": 0.9455,
      "step": 9624
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8038630485534668,
      "learning_rate": 3.287327345907381e-05,
      "loss": 2.0922,
      "step": 9625
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.150822877883911,
      "learning_rate": 3.285549305551864e-05,
      "loss": 1.7108,
      "step": 9626
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3431406021118164,
      "learning_rate": 3.283771651651609e-05,
      "loss": 1.5482,
      "step": 9627
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.362579584121704,
      "learning_rate": 3.281994384308936e-05,
      "loss": 1.1343,
      "step": 9628
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1395072937011719,
      "learning_rate": 3.28021750362613e-05,
      "loss": 1.1206,
      "step": 9629
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.0378881692886353,
      "learning_rate": 3.2784410097054666e-05,
      "loss": 0.9404,
      "step": 9630
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8437901735305786,
      "learning_rate": 3.2766649026491856e-05,
      "loss": 1.4191,
      "step": 9631
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.433838963508606,
      "learning_rate": 3.274889182559522e-05,
      "loss": 1.6528,
      "step": 9632
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.3487818241119385,
      "learning_rate": 3.2731138495386684e-05,
      "loss": 1.3579,
      "step": 9633
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8461899757385254,
      "learning_rate": 3.271338903688811e-05,
      "loss": 1.898,
      "step": 9634
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5713964700698853,
      "learning_rate": 3.2695643451121105e-05,
      "loss": 1.7595,
      "step": 9635
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.494683027267456,
      "learning_rate": 3.2677901739106974e-05,
      "loss": 1.8432,
      "step": 9636
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2961037158966064,
      "learning_rate": 3.266016390186688e-05,
      "loss": 1.4665,
      "step": 9637
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.289905071258545,
      "learning_rate": 3.264242994042177e-05,
      "loss": 1.3174,
      "step": 9638
    },
    {
      "epoch": 0.74,
      "grad_norm": 6.920060634613037,
      "learning_rate": 3.262469985579231e-05,
      "loss": 2.5267,
      "step": 9639
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.0935425758361816,
      "learning_rate": 3.2606973648998915e-05,
      "loss": 2.0317,
      "step": 9640
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.656130790710449,
      "learning_rate": 3.258925132106192e-05,
      "loss": 1.8466,
      "step": 9641
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1845929622650146,
      "learning_rate": 3.257153287300126e-05,
      "loss": 0.9438,
      "step": 9642
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7689869403839111,
      "learning_rate": 3.255381830583679e-05,
      "loss": 1.1222,
      "step": 9643
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5077046155929565,
      "learning_rate": 3.253610762058811e-05,
      "loss": 1.6612,
      "step": 9644
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6532773971557617,
      "learning_rate": 3.2518400818274495e-05,
      "loss": 1.2034,
      "step": 9645
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0132386684417725,
      "learning_rate": 3.250069789991511e-05,
      "loss": 1.0219,
      "step": 9646
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.067805767059326,
      "learning_rate": 3.24829988665289e-05,
      "loss": 1.6101,
      "step": 9647
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1010825634002686,
      "learning_rate": 3.2465303719134507e-05,
      "loss": 1.1211,
      "step": 9648
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.5302841663360596,
      "learning_rate": 3.2447612458750365e-05,
      "loss": 0.8477,
      "step": 9649
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7995299100875854,
      "learning_rate": 3.2429925086394766e-05,
      "loss": 1.2156,
      "step": 9650
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.539660930633545,
      "learning_rate": 3.241224160308565e-05,
      "loss": 2.1952,
      "step": 9651
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.411434531211853,
      "learning_rate": 3.2394562009840835e-05,
      "loss": 1.0478,
      "step": 9652
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.716478109359741,
      "learning_rate": 3.2376886307677935e-05,
      "loss": 1.8966,
      "step": 9653
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.9167850017547607,
      "learning_rate": 3.2359214497614196e-05,
      "loss": 1.0634,
      "step": 9654
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.723348379135132,
      "learning_rate": 3.2341546580666796e-05,
      "loss": 1.1042,
      "step": 9655
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6525120735168457,
      "learning_rate": 3.232388255785263e-05,
      "loss": 0.9342,
      "step": 9656
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3152036666870117,
      "learning_rate": 3.230622243018831e-05,
      "loss": 0.975,
      "step": 9657
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.669870138168335,
      "learning_rate": 3.228856619869034e-05,
      "loss": 0.9638,
      "step": 9658
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.4025113582611084,
      "learning_rate": 3.227091386437491e-05,
      "loss": 1.3364,
      "step": 9659
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0067095756530762,
      "learning_rate": 3.2253265428257984e-05,
      "loss": 0.6703,
      "step": 9660
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.328635573387146,
      "learning_rate": 3.2235620891355344e-05,
      "loss": 1.3244,
      "step": 9661
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3119194507598877,
      "learning_rate": 3.22179802546826e-05,
      "loss": 1.094,
      "step": 9662
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.106924533843994,
      "learning_rate": 3.220034351925497e-05,
      "loss": 1.3651,
      "step": 9663
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2671046257019043,
      "learning_rate": 3.2182710686087614e-05,
      "loss": 0.92,
      "step": 9664
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1165986061096191,
      "learning_rate": 3.216508175619542e-05,
      "loss": 1.5821,
      "step": 9665
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0524020195007324,
      "learning_rate": 3.214745673059297e-05,
      "loss": 1.9734,
      "step": 9666
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4317147731781006,
      "learning_rate": 3.2129835610294754e-05,
      "loss": 2.0195,
      "step": 9667
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.473257303237915,
      "learning_rate": 3.211221839631493e-05,
      "loss": 2.1089,
      "step": 9668
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0651922225952148,
      "learning_rate": 3.209460508966745e-05,
      "loss": 1.0617,
      "step": 9669
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.04622220993042,
      "learning_rate": 3.207699569136608e-05,
      "loss": 1.0514,
      "step": 9670
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.913651466369629,
      "learning_rate": 3.205939020242438e-05,
      "loss": 1.8911,
      "step": 9671
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.423316240310669,
      "learning_rate": 3.2041788623855584e-05,
      "loss": 1.7539,
      "step": 9672
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9487608671188354,
      "learning_rate": 3.2024190956672794e-05,
      "loss": 0.9172,
      "step": 9673
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3908777236938477,
      "learning_rate": 3.200659720188889e-05,
      "loss": 1.7067,
      "step": 9674
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4644376039505005,
      "learning_rate": 3.198900736051642e-05,
      "loss": 2.1904,
      "step": 9675
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.196383237838745,
      "learning_rate": 3.197142143356787e-05,
      "loss": 1.1135,
      "step": 9676
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2620933055877686,
      "learning_rate": 3.1953839422055354e-05,
      "loss": 0.8112,
      "step": 9677
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.719286322593689,
      "learning_rate": 3.19362613269908e-05,
      "loss": 1.7349,
      "step": 9678
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9935704469680786,
      "learning_rate": 3.1918687149385964e-05,
      "loss": 1.3483,
      "step": 9679
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2750909328460693,
      "learning_rate": 3.1901116890252345e-05,
      "loss": 1.3585,
      "step": 9680
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8869260549545288,
      "learning_rate": 3.188355055060118e-05,
      "loss": 1.1745,
      "step": 9681
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3955042362213135,
      "learning_rate": 3.186598813144354e-05,
      "loss": 1.5064,
      "step": 9682
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0825268030166626,
      "learning_rate": 3.184842963379025e-05,
      "loss": 1.2043,
      "step": 9683
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.276241421699524,
      "learning_rate": 3.1830875058651874e-05,
      "loss": 1.4788,
      "step": 9684
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.368427038192749,
      "learning_rate": 3.1813324407038825e-05,
      "loss": 1.2609,
      "step": 9685
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.021273136138916,
      "learning_rate": 3.179577767996117e-05,
      "loss": 1.2614,
      "step": 9686
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8745055794715881,
      "learning_rate": 3.17782348784289e-05,
      "loss": 1.1109,
      "step": 9687
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0022211074829102,
      "learning_rate": 3.176069600345165e-05,
      "loss": 1.0461,
      "step": 9688
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2676414251327515,
      "learning_rate": 3.1743161056038925e-05,
      "loss": 1.25,
      "step": 9689
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.332216739654541,
      "learning_rate": 3.172563003719993e-05,
      "loss": 1.4213,
      "step": 9690
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.673018217086792,
      "learning_rate": 3.170810294794366e-05,
      "loss": 1.6795,
      "step": 9691
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0887253284454346,
      "learning_rate": 3.1690579789278975e-05,
      "loss": 1.1373,
      "step": 9692
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3142644166946411,
      "learning_rate": 3.167306056221435e-05,
      "loss": 1.3099,
      "step": 9693
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.8756860494613647,
      "learning_rate": 3.165554526775816e-05,
      "loss": 0.9627,
      "step": 9694
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6062521934509277,
      "learning_rate": 3.163803390691853e-05,
      "loss": 1.0638,
      "step": 9695
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.83327054977417,
      "learning_rate": 3.1620526480703304e-05,
      "loss": 0.9312,
      "step": 9696
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.0920257568359375,
      "learning_rate": 3.1603022990120125e-05,
      "loss": 1.5508,
      "step": 9697
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.396242380142212,
      "learning_rate": 3.1585523436176466e-05,
      "loss": 1.7409,
      "step": 9698
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5328097343444824,
      "learning_rate": 3.156802781987948e-05,
      "loss": 1.5955,
      "step": 9699
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0185221433639526,
      "learning_rate": 3.1550536142236145e-05,
      "loss": 1.4913,
      "step": 9700
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1642926931381226,
      "learning_rate": 3.153304840425326e-05,
      "loss": 1.3033,
      "step": 9701
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1095781326293945,
      "learning_rate": 3.151556460693727e-05,
      "loss": 1.2611,
      "step": 9702
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.699737310409546,
      "learning_rate": 3.149808475129452e-05,
      "loss": 1.2812,
      "step": 9703
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6892619132995605,
      "learning_rate": 3.148060883833109e-05,
      "loss": 0.9573,
      "step": 9704
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0860984325408936,
      "learning_rate": 3.146313686905279e-05,
      "loss": 1.0488,
      "step": 9705
    },
    {
      "epoch": 0.74,
      "grad_norm": 4.14115047454834,
      "learning_rate": 3.1445668844465206e-05,
      "loss": 1.7238,
      "step": 9706
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3412361145019531,
      "learning_rate": 3.142820476557378e-05,
      "loss": 0.8772,
      "step": 9707
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9671093225479126,
      "learning_rate": 3.1410744633383605e-05,
      "loss": 1.4746,
      "step": 9708
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3602120876312256,
      "learning_rate": 3.139328844889965e-05,
      "loss": 1.3196,
      "step": 9709
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3337053060531616,
      "learning_rate": 3.137583621312665e-05,
      "loss": 1.2547,
      "step": 9710
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3852814435958862,
      "learning_rate": 3.1358387927069035e-05,
      "loss": 1.2905,
      "step": 9711
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3336424827575684,
      "learning_rate": 3.1340943591731045e-05,
      "loss": 1.0972,
      "step": 9712
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3148717880249023,
      "learning_rate": 3.132350320811677e-05,
      "loss": 1.4113,
      "step": 9713
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.47774076461792,
      "learning_rate": 3.130606677722996e-05,
      "loss": 1.7109,
      "step": 9714
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.9363292455673218,
      "learning_rate": 3.128863430007414e-05,
      "loss": 0.342,
      "step": 9715
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9959007501602173,
      "learning_rate": 3.127120577765272e-05,
      "loss": 1.706,
      "step": 9716
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1823447942733765,
      "learning_rate": 3.125378121096877e-05,
      "loss": 1.2591,
      "step": 9717
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0189688205718994,
      "learning_rate": 3.123636060102517e-05,
      "loss": 0.9954,
      "step": 9718
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.0170953273773193,
      "learning_rate": 3.121894394882463e-05,
      "loss": 1.0767,
      "step": 9719
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1878498792648315,
      "learning_rate": 3.120153125536951e-05,
      "loss": 1.3463,
      "step": 9720
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4019849300384521,
      "learning_rate": 3.118412252166205e-05,
      "loss": 1.0227,
      "step": 9721
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5867702960968018,
      "learning_rate": 3.1166717748704234e-05,
      "loss": 1.5933,
      "step": 9722
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.356972098350525,
      "learning_rate": 3.114931693749776e-05,
      "loss": 1.4028,
      "step": 9723
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.205610752105713,
      "learning_rate": 3.113192008904421e-05,
      "loss": 1.5115,
      "step": 9724
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.662620782852173,
      "learning_rate": 3.111452720434484e-05,
      "loss": 1.4286,
      "step": 9725
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.054828405380249,
      "learning_rate": 3.1097138284400665e-05,
      "loss": 1.0401,
      "step": 9726
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8150609731674194,
      "learning_rate": 3.107975333021255e-05,
      "loss": 1.8028,
      "step": 9727
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6549712419509888,
      "learning_rate": 3.1062372342781155e-05,
      "loss": 1.679,
      "step": 9728
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3425559997558594,
      "learning_rate": 3.104499532310676e-05,
      "loss": 0.5275,
      "step": 9729
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5867620706558228,
      "learning_rate": 3.102762227218957e-05,
      "loss": 1.8853,
      "step": 9730
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.259122610092163,
      "learning_rate": 3.1010253191029526e-05,
      "loss": 1.309,
      "step": 9731
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7808828353881836,
      "learning_rate": 3.099288808062626e-05,
      "loss": 0.9045,
      "step": 9732
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4007353782653809,
      "learning_rate": 3.097552694197928e-05,
      "loss": 1.2708,
      "step": 9733
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2366021871566772,
      "learning_rate": 3.095816977608782e-05,
      "loss": 1.0848,
      "step": 9734
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3977885246276855,
      "learning_rate": 3.094081658395081e-05,
      "loss": 1.3245,
      "step": 9735
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2944623231887817,
      "learning_rate": 3.092346736656711e-05,
      "loss": 1.374,
      "step": 9736
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5262529850006104,
      "learning_rate": 3.090612212493526e-05,
      "loss": 1.9786,
      "step": 9737
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6528264284133911,
      "learning_rate": 3.088878086005351e-05,
      "loss": 1.278,
      "step": 9738
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.88368821144104,
      "learning_rate": 3.087144357292001e-05,
      "loss": 1.2281,
      "step": 9739
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3611539602279663,
      "learning_rate": 3.085411026453263e-05,
      "loss": 1.2648,
      "step": 9740
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9304510354995728,
      "learning_rate": 3.0836780935888954e-05,
      "loss": 1.6363,
      "step": 9741
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8397153615951538,
      "learning_rate": 3.081945558798645e-05,
      "loss": 1.9895,
      "step": 9742
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6423293352127075,
      "learning_rate": 3.080213422182223e-05,
      "loss": 1.3554,
      "step": 9743
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.234128952026367,
      "learning_rate": 3.078481683839324e-05,
      "loss": 1.7556,
      "step": 9744
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1773382425308228,
      "learning_rate": 3.0767503438696213e-05,
      "loss": 1.8613,
      "step": 9745
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.270391583442688,
      "learning_rate": 3.075019402372766e-05,
      "loss": 0.7722,
      "step": 9746
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.534104585647583,
      "learning_rate": 3.07328885944838e-05,
      "loss": 2.5145,
      "step": 9747
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5906730890274048,
      "learning_rate": 3.071558715196066e-05,
      "loss": 1.2627,
      "step": 9748
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7284810543060303,
      "learning_rate": 3.069828969715408e-05,
      "loss": 1.0788,
      "step": 9749
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.651460886001587,
      "learning_rate": 3.068099623105958e-05,
      "loss": 1.3964,
      "step": 9750
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2560676336288452,
      "learning_rate": 3.066370675467254e-05,
      "loss": 1.1659,
      "step": 9751
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4325075149536133,
      "learning_rate": 3.0646421268988014e-05,
      "loss": 1.2486,
      "step": 9752
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4336462020874023,
      "learning_rate": 3.062913977500095e-05,
      "loss": 1.0819,
      "step": 9753
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1796543598175049,
      "learning_rate": 3.061186227370592e-05,
      "loss": 0.9491,
      "step": 9754
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.234588146209717,
      "learning_rate": 3.059458876609742e-05,
      "loss": 1.3163,
      "step": 9755
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5187678337097168,
      "learning_rate": 3.057731925316958e-05,
      "loss": 1.0044,
      "step": 9756
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5882353782653809,
      "learning_rate": 3.056005373591637e-05,
      "loss": 1.3407,
      "step": 9757
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.534421920776367,
      "learning_rate": 3.054279221533157e-05,
      "loss": 2.128,
      "step": 9758
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5240341424942017,
      "learning_rate": 3.052553469240862e-05,
      "loss": 1.5118,
      "step": 9759
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.1689331531524658,
      "learning_rate": 3.0508281168140806e-05,
      "loss": 1.0807,
      "step": 9760
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.018256187438965,
      "learning_rate": 3.0491031643521218e-05,
      "loss": 1.8412,
      "step": 9761
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4319292306900024,
      "learning_rate": 3.0473786119542615e-05,
      "loss": 1.3326,
      "step": 9762
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0585758686065674,
      "learning_rate": 3.0456544597197546e-05,
      "loss": 0.9053,
      "step": 9763
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3050991296768188,
      "learning_rate": 3.0439307077478396e-05,
      "loss": 0.957,
      "step": 9764
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6787497997283936,
      "learning_rate": 3.0422073561377328e-05,
      "loss": 1.5044,
      "step": 9765
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5969103574752808,
      "learning_rate": 3.040484404988614e-05,
      "loss": 0.974,
      "step": 9766
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7159595489501953,
      "learning_rate": 3.0387618543996542e-05,
      "loss": 1.7626,
      "step": 9767
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0910236835479736,
      "learning_rate": 3.0370397044699994e-05,
      "loss": 0.8425,
      "step": 9768
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1034072637557983,
      "learning_rate": 3.035317955298762e-05,
      "loss": 1.1331,
      "step": 9769
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2705955505371094,
      "learning_rate": 3.0335966069850442e-05,
      "loss": 1.1796,
      "step": 9770
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.664882779121399,
      "learning_rate": 3.0318756596279175e-05,
      "loss": 1.7982,
      "step": 9771
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.5034825801849365,
      "learning_rate": 3.0301551133264294e-05,
      "loss": 1.7652,
      "step": 9772
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9671785831451416,
      "learning_rate": 3.02843496817961e-05,
      "loss": 0.7854,
      "step": 9773
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7389662265777588,
      "learning_rate": 3.026715224286465e-05,
      "loss": 1.5602,
      "step": 9774
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1642502546310425,
      "learning_rate": 3.0249958817459722e-05,
      "loss": 1.6123,
      "step": 9775
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4613800048828125,
      "learning_rate": 3.0232769406570917e-05,
      "loss": 0.9235,
      "step": 9776
    },
    {
      "epoch": 0.75,
      "grad_norm": 4.02164888381958,
      "learning_rate": 3.0215584011187603e-05,
      "loss": 1.7988,
      "step": 9777
    },
    {
      "epoch": 0.75,
      "grad_norm": 4.637335777282715,
      "learning_rate": 3.019840263229885e-05,
      "loss": 1.8835,
      "step": 9778
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4226595163345337,
      "learning_rate": 3.0181225270893598e-05,
      "loss": 1.25,
      "step": 9779
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.100574254989624,
      "learning_rate": 3.0164051927960492e-05,
      "loss": 1.336,
      "step": 9780
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7167164087295532,
      "learning_rate": 3.014688260448789e-05,
      "loss": 1.1394,
      "step": 9781
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6332380771636963,
      "learning_rate": 3.0129717301464054e-05,
      "loss": 1.1811,
      "step": 9782
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5379325151443481,
      "learning_rate": 3.0112556019876946e-05,
      "loss": 1.0368,
      "step": 9783
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5499227046966553,
      "learning_rate": 3.0095398760714267e-05,
      "loss": 1.6153,
      "step": 9784
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.90078604221344,
      "learning_rate": 3.007824552496351e-05,
      "loss": 1.0978,
      "step": 9785
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2392470836639404,
      "learning_rate": 3.0061096313612002e-05,
      "loss": 1.3044,
      "step": 9786
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2560689449310303,
      "learning_rate": 3.0043951127646708e-05,
      "loss": 1.2461,
      "step": 9787
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9757612943649292,
      "learning_rate": 3.002680996805446e-05,
      "loss": 1.8849,
      "step": 9788
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4976162910461426,
      "learning_rate": 3.0009672835821855e-05,
      "loss": 1.0589,
      "step": 9789
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7442560195922852,
      "learning_rate": 2.999253973193522e-05,
      "loss": 2.0049,
      "step": 9790
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4363023042678833,
      "learning_rate": 2.997541065738062e-05,
      "loss": 0.9076,
      "step": 9791
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3163264989852905,
      "learning_rate": 2.9958285613144e-05,
      "loss": 1.3693,
      "step": 9792
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2783528566360474,
      "learning_rate": 2.9941164600210937e-05,
      "loss": 0.939,
      "step": 9793
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.407117247581482,
      "learning_rate": 2.9924047619566876e-05,
      "loss": 1.3731,
      "step": 9794
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6288236379623413,
      "learning_rate": 2.990693467219704e-05,
      "loss": 1.1447,
      "step": 9795
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.141831636428833,
      "learning_rate": 2.9889825759086297e-05,
      "loss": 1.176,
      "step": 9796
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1917403936386108,
      "learning_rate": 2.9872720881219408e-05,
      "loss": 1.0361,
      "step": 9797
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2784584760665894,
      "learning_rate": 2.9855620039580878e-05,
      "loss": 1.3983,
      "step": 9798
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3066595792770386,
      "learning_rate": 2.9838523235154935e-05,
      "loss": 1.5016,
      "step": 9799
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.902472734451294,
      "learning_rate": 2.9821430468925572e-05,
      "loss": 0.9144,
      "step": 9800
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5246065855026245,
      "learning_rate": 2.980434174187663e-05,
      "loss": 1.6497,
      "step": 9801
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5093729496002197,
      "learning_rate": 2.9787257054991592e-05,
      "loss": 1.2258,
      "step": 9802
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2964344024658203,
      "learning_rate": 2.9770176409253837e-05,
      "loss": 0.6989,
      "step": 9803
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4492144584655762,
      "learning_rate": 2.9753099805646455e-05,
      "loss": 2.1852,
      "step": 9804
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1677517890930176,
      "learning_rate": 2.9736027245152275e-05,
      "loss": 0.6375,
      "step": 9805
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2268033027648926,
      "learning_rate": 2.971895872875392e-05,
      "loss": 1.578,
      "step": 9806
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0838348865509033,
      "learning_rate": 2.9701894257433826e-05,
      "loss": 1.4347,
      "step": 9807
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2848349809646606,
      "learning_rate": 2.968483383217413e-05,
      "loss": 1.6313,
      "step": 9808
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.030496120452881,
      "learning_rate": 2.9667777453956715e-05,
      "loss": 1.5319,
      "step": 9809
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6429471969604492,
      "learning_rate": 2.9650725123763334e-05,
      "loss": 1.2439,
      "step": 9810
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1556422710418701,
      "learning_rate": 2.9633676842575387e-05,
      "loss": 1.4869,
      "step": 9811
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.447844386100769,
      "learning_rate": 2.961663261137414e-05,
      "loss": 1.4398,
      "step": 9812
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4017531871795654,
      "learning_rate": 2.95995924311406e-05,
      "loss": 2.1041,
      "step": 9813
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4044979810714722,
      "learning_rate": 2.9582556302855478e-05,
      "loss": 1.6719,
      "step": 9814
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.852400779724121,
      "learning_rate": 2.956552422749934e-05,
      "loss": 0.4974,
      "step": 9815
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3748071193695068,
      "learning_rate": 2.95484962060525e-05,
      "loss": 1.8201,
      "step": 9816
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2387781143188477,
      "learning_rate": 2.953147223949495e-05,
      "loss": 1.2718,
      "step": 9817
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.677414894104004,
      "learning_rate": 2.9514452328806585e-05,
      "loss": 1.8692,
      "step": 9818
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3113856315612793,
      "learning_rate": 2.9497436474966978e-05,
      "loss": 1.5334,
      "step": 9819
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2834442853927612,
      "learning_rate": 2.9480424678955443e-05,
      "loss": 1.4972,
      "step": 9820
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9478759765625,
      "learning_rate": 2.9463416941751153e-05,
      "loss": 1.2034,
      "step": 9821
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6504669189453125,
      "learning_rate": 2.9446413264333018e-05,
      "loss": 1.0774,
      "step": 9822
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9570661783218384,
      "learning_rate": 2.942941364767964e-05,
      "loss": 0.9544,
      "step": 9823
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8818358182907104,
      "learning_rate": 2.9412418092769477e-05,
      "loss": 0.9027,
      "step": 9824
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8670494556427002,
      "learning_rate": 2.9395426600580757e-05,
      "loss": 1.3514,
      "step": 9825
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.596055269241333,
      "learning_rate": 2.9378439172091365e-05,
      "loss": 1.1932,
      "step": 9826
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5253612995147705,
      "learning_rate": 2.9361455808279103e-05,
      "loss": 1.3884,
      "step": 9827
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.273491621017456,
      "learning_rate": 2.934447651012141e-05,
      "loss": 0.8274,
      "step": 9828
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5546493530273438,
      "learning_rate": 2.9327501278595526e-05,
      "loss": 1.6229,
      "step": 9829
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9312814474105835,
      "learning_rate": 2.9310530114678502e-05,
      "loss": 1.5177,
      "step": 9830
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.120259165763855,
      "learning_rate": 2.929356301934716e-05,
      "loss": 1.6165,
      "step": 9831
    },
    {
      "epoch": 0.75,
      "eval_loss": NaN,
      "eval_runtime": 290.6843,
      "eval_samples_per_second": 9.495,
      "eval_steps_per_second": 9.495,
      "step": 9831
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2288661003112793,
      "learning_rate": 2.927659999357798e-05,
      "loss": 1.0579,
      "step": 9832
    },
    {
      "epoch": 0.75,
      "grad_norm": 4.17327880859375,
      "learning_rate": 2.925964103834732e-05,
      "loss": 1.3587,
      "step": 9833
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.435926914215088,
      "learning_rate": 2.9242686154631294e-05,
      "loss": 1.2284,
      "step": 9834
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6266705989837646,
      "learning_rate": 2.9225735343405693e-05,
      "loss": 1.471,
      "step": 9835
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.463406801223755,
      "learning_rate": 2.9208788605646197e-05,
      "loss": 1.2031,
      "step": 9836
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2086553573608398,
      "learning_rate": 2.9191845942328166e-05,
      "loss": 1.6247,
      "step": 9837
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5805599689483643,
      "learning_rate": 2.9174907354426696e-05,
      "loss": 1.851,
      "step": 9838
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.263479232788086,
      "learning_rate": 2.9157972842916748e-05,
      "loss": 1.3524,
      "step": 9839
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4666560888290405,
      "learning_rate": 2.9141042408773024e-05,
      "loss": 1.6128,
      "step": 9840
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.33943247795105,
      "learning_rate": 2.912411605296991e-05,
      "loss": 1.5719,
      "step": 9841
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1582309007644653,
      "learning_rate": 2.9107193776481644e-05,
      "loss": 1.6147,
      "step": 9842
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7381961345672607,
      "learning_rate": 2.909027558028222e-05,
      "loss": 1.2425,
      "step": 9843
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2151319980621338,
      "learning_rate": 2.9073361465345337e-05,
      "loss": 0.9961,
      "step": 9844
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.847661554813385,
      "learning_rate": 2.905645143264455e-05,
      "loss": 1.1334,
      "step": 9845
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0940927267074585,
      "learning_rate": 2.9039545483153074e-05,
      "loss": 1.2354,
      "step": 9846
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0254027843475342,
      "learning_rate": 2.9022643617843992e-05,
      "loss": 1.1793,
      "step": 9847
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.9861767292022705,
      "learning_rate": 2.9005745837690047e-05,
      "loss": 1.3624,
      "step": 9848
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3303829431533813,
      "learning_rate": 2.898885214366388e-05,
      "loss": 1.363,
      "step": 9849
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.720288872718811,
      "learning_rate": 2.897196253673773e-05,
      "loss": 0.9718,
      "step": 9850
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9566493034362793,
      "learning_rate": 2.8955077017883746e-05,
      "loss": 1.5333,
      "step": 9851
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5166895389556885,
      "learning_rate": 2.8938195588073813e-05,
      "loss": 1.7335,
      "step": 9852
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.83914053440094,
      "learning_rate": 2.892131824827948e-05,
      "loss": 1.6319,
      "step": 9853
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6300731897354126,
      "learning_rate": 2.890444499947218e-05,
      "loss": 1.2323,
      "step": 9854
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7397032976150513,
      "learning_rate": 2.8887575842623093e-05,
      "loss": 1.2679,
      "step": 9855
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3018248081207275,
      "learning_rate": 2.8870710778703103e-05,
      "loss": 1.1489,
      "step": 9856
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.380813479423523,
      "learning_rate": 2.8853849808682865e-05,
      "loss": 1.5941,
      "step": 9857
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.924304723739624,
      "learning_rate": 2.883699293353288e-05,
      "loss": 0.9722,
      "step": 9858
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.394213318824768,
      "learning_rate": 2.8820140154223295e-05,
      "loss": 1.584,
      "step": 9859
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.5232138633728027,
      "learning_rate": 2.8803291471724125e-05,
      "loss": 2.1417,
      "step": 9860
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5179970264434814,
      "learning_rate": 2.8786446887005135e-05,
      "loss": 1.4903,
      "step": 9861
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4053640365600586,
      "learning_rate": 2.8769606401035765e-05,
      "loss": 1.2452,
      "step": 9862
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.509939193725586,
      "learning_rate": 2.87527700147853e-05,
      "loss": 2.2547,
      "step": 9863
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2163530588150024,
      "learning_rate": 2.873593772922283e-05,
      "loss": 1.1511,
      "step": 9864
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1754176616668701,
      "learning_rate": 2.8719109545317103e-05,
      "loss": 1.0623,
      "step": 9865
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2032248973846436,
      "learning_rate": 2.870228546403664e-05,
      "loss": 1.7442,
      "step": 9866
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1375399827957153,
      "learning_rate": 2.8685465486349828e-05,
      "loss": 0.8257,
      "step": 9867
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.652255892753601,
      "learning_rate": 2.8668649613224707e-05,
      "loss": 1.1651,
      "step": 9868
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0320560932159424,
      "learning_rate": 2.865183784562915e-05,
      "loss": 1.4998,
      "step": 9869
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.343801259994507,
      "learning_rate": 2.8635030184530788e-05,
      "loss": 1.8293,
      "step": 9870
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.838140845298767,
      "learning_rate": 2.861822663089695e-05,
      "loss": 0.7974,
      "step": 9871
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0044026374816895,
      "learning_rate": 2.8601427185694807e-05,
      "loss": 0.7912,
      "step": 9872
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2653120756149292,
      "learning_rate": 2.8584631849891296e-05,
      "loss": 0.8941,
      "step": 9873
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8365421295166016,
      "learning_rate": 2.856784062445306e-05,
      "loss": 1.3482,
      "step": 9874
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2172091007232666,
      "learning_rate": 2.855105351034648e-05,
      "loss": 1.3498,
      "step": 9875
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5324382781982422,
      "learning_rate": 2.853427050853783e-05,
      "loss": 1.134,
      "step": 9876
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.401850938796997,
      "learning_rate": 2.8517491619993008e-05,
      "loss": 1.6939,
      "step": 9877
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8702013492584229,
      "learning_rate": 2.8500716845677746e-05,
      "loss": 1.3817,
      "step": 9878
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5610089302062988,
      "learning_rate": 2.8483946186557586e-05,
      "loss": 1.2715,
      "step": 9879
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.0320061445236206,
      "learning_rate": 2.8467179643597697e-05,
      "loss": 0.7711,
      "step": 9880
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2592060565948486,
      "learning_rate": 2.8450417217763115e-05,
      "loss": 1.1098,
      "step": 9881
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2155585289001465,
      "learning_rate": 2.8433658910018666e-05,
      "loss": 1.2303,
      "step": 9882
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2914352416992188,
      "learning_rate": 2.841690472132882e-05,
      "loss": 1.1167,
      "step": 9883
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2223668098449707,
      "learning_rate": 2.8400154652657917e-05,
      "loss": 1.0785,
      "step": 9884
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.255253553390503,
      "learning_rate": 2.8383408704970016e-05,
      "loss": 1.9362,
      "step": 9885
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2100653648376465,
      "learning_rate": 2.8366666879228898e-05,
      "loss": 1.036,
      "step": 9886
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.292362093925476,
      "learning_rate": 2.8349929176398193e-05,
      "loss": 0.9933,
      "step": 9887
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.7715044021606445,
      "learning_rate": 2.8333195597441264e-05,
      "loss": 1.3047,
      "step": 9888
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4855303764343262,
      "learning_rate": 2.8316466143321176e-05,
      "loss": 1.874,
      "step": 9889
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7721507549285889,
      "learning_rate": 2.829974081500084e-05,
      "loss": 1.7164,
      "step": 9890
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4131581783294678,
      "learning_rate": 2.828301961344292e-05,
      "loss": 1.2947,
      "step": 9891
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.8337750434875488,
      "learning_rate": 2.8266302539609745e-05,
      "loss": 1.1905,
      "step": 9892
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.966543674468994,
      "learning_rate": 2.8249589594463567e-05,
      "loss": 2.183,
      "step": 9893
    },
    {
      "epoch": 0.75,
      "grad_norm": 4.609652042388916,
      "learning_rate": 2.8232880778966254e-05,
      "loss": 2.4295,
      "step": 9894
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.729478597640991,
      "learning_rate": 2.8216176094079482e-05,
      "loss": 1.7689,
      "step": 9895
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7251992225646973,
      "learning_rate": 2.819947554076472e-05,
      "loss": 1.8075,
      "step": 9896
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5597665309906006,
      "learning_rate": 2.8182779119983216e-05,
      "loss": 0.9399,
      "step": 9897
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4155360460281372,
      "learning_rate": 2.816608683269589e-05,
      "loss": 1.159,
      "step": 9898
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2453337907791138,
      "learning_rate": 2.8149398679863504e-05,
      "loss": 1.3454,
      "step": 9899
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.126202344894409,
      "learning_rate": 2.813271466244656e-05,
      "loss": 2.0308,
      "step": 9900
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.489165782928467,
      "learning_rate": 2.8116034781405342e-05,
      "loss": 1.5052,
      "step": 9901
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2212772369384766,
      "learning_rate": 2.8099359037699846e-05,
      "loss": 1.2729,
      "step": 9902
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3086652755737305,
      "learning_rate": 2.8082687432289833e-05,
      "loss": 1.4177,
      "step": 9903
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.414258360862732,
      "learning_rate": 2.8066019966134904e-05,
      "loss": 1.1832,
      "step": 9904
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9380161166191101,
      "learning_rate": 2.8049356640194314e-05,
      "loss": 1.0657,
      "step": 9905
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.0105323791503906,
      "learning_rate": 2.8032697455427158e-05,
      "loss": 1.5207,
      "step": 9906
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4063243865966797,
      "learning_rate": 2.8016042412792297e-05,
      "loss": 1.3965,
      "step": 9907
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3528258800506592,
      "learning_rate": 2.7999391513248264e-05,
      "loss": 1.3436,
      "step": 9908
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5066590309143066,
      "learning_rate": 2.7982744757753455e-05,
      "loss": 1.1269,
      "step": 9909
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.2443418502807617,
      "learning_rate": 2.7966102147265994e-05,
      "loss": 1.2303,
      "step": 9910
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.467970609664917,
      "learning_rate": 2.794946368274376e-05,
      "loss": 1.7337,
      "step": 9911
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.098098874092102,
      "learning_rate": 2.793282936514433e-05,
      "loss": 1.4284,
      "step": 9912
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3988037109375,
      "learning_rate": 2.7916199195425184e-05,
      "loss": 1.7588,
      "step": 9913
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6622314453125,
      "learning_rate": 2.7899573174543427e-05,
      "loss": 2.1561,
      "step": 9914
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3776884078979492,
      "learning_rate": 2.7882951303455994e-05,
      "loss": 1.5751,
      "step": 9915
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5144805908203125,
      "learning_rate": 2.786633358311962e-05,
      "loss": 0.9974,
      "step": 9916
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2626471519470215,
      "learning_rate": 2.7849720014490677e-05,
      "loss": 1.493,
      "step": 9917
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2404320240020752,
      "learning_rate": 2.7833110598525404e-05,
      "loss": 1.4527,
      "step": 9918
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4748828411102295,
      "learning_rate": 2.7816505336179798e-05,
      "loss": 1.0689,
      "step": 9919
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1526161432266235,
      "learning_rate": 2.7799904228409535e-05,
      "loss": 1.036,
      "step": 9920
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1426278352737427,
      "learning_rate": 2.7783307276170144e-05,
      "loss": 0.6739,
      "step": 9921
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9890029430389404,
      "learning_rate": 2.776671448041688e-05,
      "loss": 1.6125,
      "step": 9922
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5236905813217163,
      "learning_rate": 2.775012584210469e-05,
      "loss": 1.2191,
      "step": 9923
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.760150909423828,
      "learning_rate": 2.7733541362188388e-05,
      "loss": 1.785,
      "step": 9924
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4712268114089966,
      "learning_rate": 2.7716961041622534e-05,
      "loss": 1.0487,
      "step": 9925
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4608709812164307,
      "learning_rate": 2.7700384881361373e-05,
      "loss": 1.0362,
      "step": 9926
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.8219335079193115,
      "learning_rate": 2.768381288235897e-05,
      "loss": 1.6592,
      "step": 9927
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.9433813095092773,
      "learning_rate": 2.766724504556919e-05,
      "loss": 0.7656,
      "step": 9928
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3744033575057983,
      "learning_rate": 2.765068137194553e-05,
      "loss": 0.84,
      "step": 9929
    },
    {
      "epoch": 0.76,
      "grad_norm": 4.350139617919922,
      "learning_rate": 2.7634121862441386e-05,
      "loss": 1.5173,
      "step": 9930
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7426376342773438,
      "learning_rate": 2.761756651800983e-05,
      "loss": 1.627,
      "step": 9931
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2263967990875244,
      "learning_rate": 2.7601015339603688e-05,
      "loss": 1.7045,
      "step": 9932
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7165838479995728,
      "learning_rate": 2.7584468328175596e-05,
      "loss": 2.1446,
      "step": 9933
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3729451894760132,
      "learning_rate": 2.756792548467797e-05,
      "loss": 1.4354,
      "step": 9934
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.362881064414978,
      "learning_rate": 2.7551386810062873e-05,
      "loss": 0.9028,
      "step": 9935
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1134092807769775,
      "learning_rate": 2.7534852305282243e-05,
      "loss": 1.7642,
      "step": 9936
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.0538506507873535,
      "learning_rate": 2.7518321971287776e-05,
      "loss": 1.2796,
      "step": 9937
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.8953323364257812,
      "learning_rate": 2.7501795809030795e-05,
      "loss": 2.7809,
      "step": 9938
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.56067156791687,
      "learning_rate": 2.7485273819462566e-05,
      "loss": 1.2713,
      "step": 9939
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4957901239395142,
      "learning_rate": 2.746875600353398e-05,
      "loss": 2.3525,
      "step": 9940
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0428431034088135,
      "learning_rate": 2.7452242362195702e-05,
      "loss": 1.1447,
      "step": 9941
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3031396865844727,
      "learning_rate": 2.7435732896398214e-05,
      "loss": 1.3539,
      "step": 9942
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.8913432359695435,
      "learning_rate": 2.741922760709178e-05,
      "loss": 1.1425,
      "step": 9943
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.17798912525177,
      "learning_rate": 2.7402726495226295e-05,
      "loss": 1.257,
      "step": 9944
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1810134649276733,
      "learning_rate": 2.738622956175153e-05,
      "loss": 1.102,
      "step": 9945
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0075933933258057,
      "learning_rate": 2.736973680761702e-05,
      "loss": 1.3604,
      "step": 9946
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3008067607879639,
      "learning_rate": 2.7353248233771934e-05,
      "loss": 1.968,
      "step": 9947
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3589468002319336,
      "learning_rate": 2.7336763841165324e-05,
      "loss": 1.6417,
      "step": 9948
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2988662719726562,
      "learning_rate": 2.7320283630746e-05,
      "loss": 0.9763,
      "step": 9949
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5401133298873901,
      "learning_rate": 2.730380760346245e-05,
      "loss": 1.1448,
      "step": 9950
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2415835857391357,
      "learning_rate": 2.7287335760262944e-05,
      "loss": 1.1501,
      "step": 9951
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.528572678565979,
      "learning_rate": 2.727086810209559e-05,
      "loss": 1.0626,
      "step": 9952
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3174229860305786,
      "learning_rate": 2.7254404629908135e-05,
      "loss": 1.1754,
      "step": 9953
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.732388734817505,
      "learning_rate": 2.7237945344648174e-05,
      "loss": 1.9943,
      "step": 9954
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.069733142852783,
      "learning_rate": 2.722149024726307e-05,
      "loss": 1.6473,
      "step": 9955
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6307148933410645,
      "learning_rate": 2.7205039338699844e-05,
      "loss": 1.6439,
      "step": 9956
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5109840631484985,
      "learning_rate": 2.7188592619905363e-05,
      "loss": 1.8322,
      "step": 9957
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9224886894226074,
      "learning_rate": 2.7172150091826276e-05,
      "loss": 1.4887,
      "step": 9958
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1920084953308105,
      "learning_rate": 2.7155711755408897e-05,
      "loss": 1.2366,
      "step": 9959
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6042852401733398,
      "learning_rate": 2.7139277611599333e-05,
      "loss": 2.1553,
      "step": 9960
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1328957080841064,
      "learning_rate": 2.712284766134352e-05,
      "loss": 1.288,
      "step": 9961
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2992920875549316,
      "learning_rate": 2.7106421905587033e-05,
      "loss": 1.4172,
      "step": 9962
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8835104703903198,
      "learning_rate": 2.709000034527529e-05,
      "loss": 1.5129,
      "step": 9963
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7478357553482056,
      "learning_rate": 2.7073582981353496e-05,
      "loss": 1.3931,
      "step": 9964
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1287708282470703,
      "learning_rate": 2.7057169814766492e-05,
      "loss": 1.5497,
      "step": 9965
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2486838102340698,
      "learning_rate": 2.704076084645898e-05,
      "loss": 1.048,
      "step": 9966
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.469651222229004,
      "learning_rate": 2.7024356077375424e-05,
      "loss": 1.1457,
      "step": 9967
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7994638681411743,
      "learning_rate": 2.700795550845998e-05,
      "loss": 1.732,
      "step": 9968
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.374159812927246,
      "learning_rate": 2.6991559140656576e-05,
      "loss": 1.5403,
      "step": 9969
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1519238948822021,
      "learning_rate": 2.697516697490896e-05,
      "loss": 1.0838,
      "step": 9970
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2215951681137085,
      "learning_rate": 2.695877901216054e-05,
      "loss": 0.9611,
      "step": 9971
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4440709352493286,
      "learning_rate": 2.694239525335458e-05,
      "loss": 1.6831,
      "step": 9972
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1297895908355713,
      "learning_rate": 2.6926015699434072e-05,
      "loss": 1.1893,
      "step": 9973
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9826947450637817,
      "learning_rate": 2.6909640351341704e-05,
      "loss": 2.0175,
      "step": 9974
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5825494527816772,
      "learning_rate": 2.689326921002e-05,
      "loss": 2.1111,
      "step": 9975
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.2372941970825195,
      "learning_rate": 2.6876902276411254e-05,
      "loss": 1.273,
      "step": 9976
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1515151262283325,
      "learning_rate": 2.6860539551457387e-05,
      "loss": 0.7003,
      "step": 9977
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8755147457122803,
      "learning_rate": 2.6844181036100257e-05,
      "loss": 1.4053,
      "step": 9978
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9888705015182495,
      "learning_rate": 2.6827826731281357e-05,
      "loss": 2.0375,
      "step": 9979
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8464399576187134,
      "learning_rate": 2.6811476637941922e-05,
      "loss": 1.6097,
      "step": 9980
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8737802505493164,
      "learning_rate": 2.6795130757023044e-05,
      "loss": 1.8245,
      "step": 9981
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7224849462509155,
      "learning_rate": 2.677878908946555e-05,
      "loss": 1.3079,
      "step": 9982
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9893550872802734,
      "learning_rate": 2.676245163620993e-05,
      "loss": 1.3279,
      "step": 9983
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1458516120910645,
      "learning_rate": 2.6746118398196528e-05,
      "loss": 0.9801,
      "step": 9984
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.551772117614746,
      "learning_rate": 2.6729789376365456e-05,
      "loss": 1.4804,
      "step": 9985
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6198434829711914,
      "learning_rate": 2.6713464571656467e-05,
      "loss": 1.8249,
      "step": 9986
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.271816611289978,
      "learning_rate": 2.669714398500922e-05,
      "loss": 1.2753,
      "step": 9987
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5172053575515747,
      "learning_rate": 2.6680827617363024e-05,
      "loss": 1.5725,
      "step": 9988
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2845418453216553,
      "learning_rate": 2.6664515469656946e-05,
      "loss": 1.1647,
      "step": 9989
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.840773344039917,
      "learning_rate": 2.6648207542829883e-05,
      "loss": 1.3655,
      "step": 9990
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1909059286117554,
      "learning_rate": 2.663190383782048e-05,
      "loss": 1.6348,
      "step": 9991
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7970335483551025,
      "learning_rate": 2.6615604355567037e-05,
      "loss": 1.7639,
      "step": 9992
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1693103313446045,
      "learning_rate": 2.6599309097007728e-05,
      "loss": 1.4696,
      "step": 9993
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.44462251663208,
      "learning_rate": 2.6583018063080457e-05,
      "loss": 1.4556,
      "step": 9994
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5109143257141113,
      "learning_rate": 2.656673125472282e-05,
      "loss": 2.3251,
      "step": 9995
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2600109577178955,
      "learning_rate": 2.6550448672872254e-05,
      "loss": 1.1255,
      "step": 9996
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5399562120437622,
      "learning_rate": 2.653417031846591e-05,
      "loss": 1.2687,
      "step": 9997
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.257906198501587,
      "learning_rate": 2.651789619244065e-05,
      "loss": 1.2159,
      "step": 9998
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.506640672683716,
      "learning_rate": 2.6501626295733197e-05,
      "loss": 1.527,
      "step": 9999
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5571045875549316,
      "learning_rate": 2.6485360629279987e-05,
      "loss": 1.2884,
      "step": 10000
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3000562191009521,
      "learning_rate": 2.6469099194017143e-05,
      "loss": 1.3543,
      "step": 10001
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.1831305027008057,
      "learning_rate": 2.645284199088065e-05,
      "loss": 1.0982,
      "step": 10002
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.022190809249878,
      "learning_rate": 2.6436589020806223e-05,
      "loss": 1.1669,
      "step": 10003
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1326169967651367,
      "learning_rate": 2.642034028472925e-05,
      "loss": 2.0533,
      "step": 10004
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0530588626861572,
      "learning_rate": 2.6404095783585002e-05,
      "loss": 1.1443,
      "step": 10005
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5319364070892334,
      "learning_rate": 2.638785551830839e-05,
      "loss": 1.4814,
      "step": 10006
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.158700466156006,
      "learning_rate": 2.63716194898342e-05,
      "loss": 1.654,
      "step": 10007
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.0778956413269043,
      "learning_rate": 2.6355387699096823e-05,
      "loss": 0.6456,
      "step": 10008
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5502012968063354,
      "learning_rate": 2.633916014703057e-05,
      "loss": 1.239,
      "step": 10009
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.545049786567688,
      "learning_rate": 2.632293683456938e-05,
      "loss": 1.238,
      "step": 10010
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4095444679260254,
      "learning_rate": 2.6306717762647015e-05,
      "loss": 1.1184,
      "step": 10011
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.655484676361084,
      "learning_rate": 2.6290502932197005e-05,
      "loss": 1.376,
      "step": 10012
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.823370337486267,
      "learning_rate": 2.627429234415254e-05,
      "loss": 1.456,
      "step": 10013
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3673454523086548,
      "learning_rate": 2.625808599944668e-05,
      "loss": 1.3039,
      "step": 10014
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6522552967071533,
      "learning_rate": 2.624188389901221e-05,
      "loss": 1.9222,
      "step": 10015
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4653929471969604,
      "learning_rate": 2.622568604378163e-05,
      "loss": 1.4981,
      "step": 10016
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.719353437423706,
      "learning_rate": 2.620949243468719e-05,
      "loss": 1.258,
      "step": 10017
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5942963361740112,
      "learning_rate": 2.619330307266098e-05,
      "loss": 0.94,
      "step": 10018
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.923888921737671,
      "learning_rate": 2.6177117958634746e-05,
      "loss": 1.6271,
      "step": 10019
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.557478427886963,
      "learning_rate": 2.6160937093540037e-05,
      "loss": 1.3651,
      "step": 10020
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2625720500946045,
      "learning_rate": 2.6144760478308217e-05,
      "loss": 0.9789,
      "step": 10021
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2156974077224731,
      "learning_rate": 2.6128588113870257e-05,
      "loss": 1.6982,
      "step": 10022
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3904109001159668,
      "learning_rate": 2.6112420001157012e-05,
      "loss": 2.1541,
      "step": 10023
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4477291107177734,
      "learning_rate": 2.6096256141099086e-05,
      "loss": 1.0211,
      "step": 10024
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.618258237838745,
      "learning_rate": 2.6080096534626753e-05,
      "loss": 1.7782,
      "step": 10025
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2729227542877197,
      "learning_rate": 2.606394118267008e-05,
      "loss": 1.5942,
      "step": 10026
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8218131065368652,
      "learning_rate": 2.6047790086158952e-05,
      "loss": 1.4325,
      "step": 10027
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4811742305755615,
      "learning_rate": 2.6031643246022896e-05,
      "loss": 0.9428,
      "step": 10028
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1555135250091553,
      "learning_rate": 2.6015500663191282e-05,
      "loss": 1.564,
      "step": 10029
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.216416597366333,
      "learning_rate": 2.599936233859326e-05,
      "loss": 0.8422,
      "step": 10030
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.8586458563804626,
      "learning_rate": 2.59832282731576e-05,
      "loss": 0.8178,
      "step": 10031
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7115850448608398,
      "learning_rate": 2.5967098467812945e-05,
      "loss": 1.2463,
      "step": 10032
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2819589376449585,
      "learning_rate": 2.5950972923487692e-05,
      "loss": 1.5719,
      "step": 10033
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3656420707702637,
      "learning_rate": 2.5934851641109924e-05,
      "loss": 2.0335,
      "step": 10034
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3471424579620361,
      "learning_rate": 2.5918734621607498e-05,
      "loss": 1.3813,
      "step": 10035
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.981519341468811,
      "learning_rate": 2.590262186590805e-05,
      "loss": 2.1969,
      "step": 10036
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.016618251800537,
      "learning_rate": 2.5886513374939015e-05,
      "loss": 1.4083,
      "step": 10037
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9999338388442993,
      "learning_rate": 2.587040914962745e-05,
      "loss": 1.3006,
      "step": 10038
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1600075960159302,
      "learning_rate": 2.5854309190900283e-05,
      "loss": 1.1925,
      "step": 10039
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6992170810699463,
      "learning_rate": 2.5838213499684182e-05,
      "loss": 1.4619,
      "step": 10040
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6046242713928223,
      "learning_rate": 2.5822122076905508e-05,
      "loss": 1.8569,
      "step": 10041
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3080694675445557,
      "learning_rate": 2.580603492349045e-05,
      "loss": 1.8013,
      "step": 10042
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2834792137145996,
      "learning_rate": 2.5789952040364875e-05,
      "loss": 1.5146,
      "step": 10043
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6601407527923584,
      "learning_rate": 2.5773873428454486e-05,
      "loss": 1.6069,
      "step": 10044
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1968848705291748,
      "learning_rate": 2.5757799088684654e-05,
      "loss": 0.6489,
      "step": 10045
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.44315767288208,
      "learning_rate": 2.574172902198061e-05,
      "loss": 1.404,
      "step": 10046
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.581115484237671,
      "learning_rate": 2.5725663229267216e-05,
      "loss": 1.5031,
      "step": 10047
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1346380710601807,
      "learning_rate": 2.5709601711469178e-05,
      "loss": 1.1681,
      "step": 10048
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.496320128440857,
      "learning_rate": 2.5693544469510967e-05,
      "loss": 1.3456,
      "step": 10049
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3829832077026367,
      "learning_rate": 2.5677491504316696e-05,
      "loss": 1.1819,
      "step": 10050
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4363938570022583,
      "learning_rate": 2.5661442816810344e-05,
      "loss": 1.6755,
      "step": 10051
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4180032014846802,
      "learning_rate": 2.5645398407915632e-05,
      "loss": 1.0282,
      "step": 10052
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2121096849441528,
      "learning_rate": 2.562935827855598e-05,
      "loss": 0.5391,
      "step": 10053
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.7517099976539612,
      "learning_rate": 2.5613322429654574e-05,
      "loss": 0.8254,
      "step": 10054
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9539871215820312,
      "learning_rate": 2.5597290862134405e-05,
      "loss": 0.8296,
      "step": 10055
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7542980909347534,
      "learning_rate": 2.5581263576918125e-05,
      "loss": 1.6151,
      "step": 10056
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2524521350860596,
      "learning_rate": 2.556524057492824e-05,
      "loss": 0.9653,
      "step": 10057
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.123622179031372,
      "learning_rate": 2.554922185708699e-05,
      "loss": 1.5911,
      "step": 10058
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.9105656147003174,
      "learning_rate": 2.5533207424316287e-05,
      "loss": 1.4618,
      "step": 10059
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4106063842773438,
      "learning_rate": 2.5517197277537886e-05,
      "loss": 1.8856,
      "step": 10060
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.929081916809082,
      "learning_rate": 2.5501191417673276e-05,
      "loss": 1.2835,
      "step": 10061
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3819141387939453,
      "learning_rate": 2.5485189845643675e-05,
      "loss": 1.0001,
      "step": 10062
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.7523107528686523,
      "learning_rate": 2.5469192562370027e-05,
      "loss": 2.4899,
      "step": 10063
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3025927543640137,
      "learning_rate": 2.5453199568773123e-05,
      "loss": 0.9715,
      "step": 10064
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6721323728561401,
      "learning_rate": 2.5437210865773407e-05,
      "loss": 0.9991,
      "step": 10065
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3430989980697632,
      "learning_rate": 2.5421226454291137e-05,
      "loss": 1.2948,
      "step": 10066
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9456359148025513,
      "learning_rate": 2.5405246335246348e-05,
      "loss": 1.4754,
      "step": 10067
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3979105949401855,
      "learning_rate": 2.5389270509558716e-05,
      "loss": 1.6752,
      "step": 10068
    },
    {
      "epoch": 0.77,
      "grad_norm": 6.2502760887146,
      "learning_rate": 2.5373298978147787e-05,
      "loss": 1.8995,
      "step": 10069
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.879441022872925,
      "learning_rate": 2.535733174193282e-05,
      "loss": 1.9584,
      "step": 10070
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5812956094741821,
      "learning_rate": 2.534136880183282e-05,
      "loss": 1.2791,
      "step": 10071
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1740314960479736,
      "learning_rate": 2.5325410158766504e-05,
      "loss": 1.0006,
      "step": 10072
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1836100816726685,
      "learning_rate": 2.530945581365244e-05,
      "loss": 1.2581,
      "step": 10073
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.627845287322998,
      "learning_rate": 2.5293505767408833e-05,
      "loss": 1.8139,
      "step": 10074
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0730055570602417,
      "learning_rate": 2.527756002095373e-05,
      "loss": 0.929,
      "step": 10075
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.25823175907135,
      "learning_rate": 2.5261618575204938e-05,
      "loss": 1.779,
      "step": 10076
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.0104644298553467,
      "learning_rate": 2.5245681431079915e-05,
      "loss": 1.1537,
      "step": 10077
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3165836334228516,
      "learning_rate": 2.522974858949596e-05,
      "loss": 0.8875,
      "step": 10078
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.9366042613983154,
      "learning_rate": 2.521382005137013e-05,
      "loss": 2.5853,
      "step": 10079
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3664644956588745,
      "learning_rate": 2.5197895817619153e-05,
      "loss": 1.2006,
      "step": 10080
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4791758060455322,
      "learning_rate": 2.5181975889159615e-05,
      "loss": 1.2396,
      "step": 10081
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6401443481445312,
      "learning_rate": 2.5166060266907766e-05,
      "loss": 1.0148,
      "step": 10082
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.341406226158142,
      "learning_rate": 2.5150148951779616e-05,
      "loss": 1.4742,
      "step": 10083
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9720563888549805,
      "learning_rate": 2.5134241944690984e-05,
      "loss": 1.4798,
      "step": 10084
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7972917556762695,
      "learning_rate": 2.5118339246557433e-05,
      "loss": 1.068,
      "step": 10085
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8907182216644287,
      "learning_rate": 2.5102440858294198e-05,
      "loss": 0.8578,
      "step": 10086
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2755866050720215,
      "learning_rate": 2.5086546780816357e-05,
      "loss": 1.2458,
      "step": 10087
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6322942972183228,
      "learning_rate": 2.5070657015038734e-05,
      "loss": 1.1731,
      "step": 10088
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5498850345611572,
      "learning_rate": 2.5054771561875812e-05,
      "loss": 1.4291,
      "step": 10089
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.436566114425659,
      "learning_rate": 2.5038890422241958e-05,
      "loss": 1.6951,
      "step": 10090
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8224105834960938,
      "learning_rate": 2.5023013597051183e-05,
      "loss": 0.6864,
      "step": 10091
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1675395965576172,
      "learning_rate": 2.5007141087217257e-05,
      "loss": 1.5155,
      "step": 10092
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7291996479034424,
      "learning_rate": 2.4991272893653782e-05,
      "loss": 0.8282,
      "step": 10093
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1120007038116455,
      "learning_rate": 2.4975409017274088e-05,
      "loss": 1.4558,
      "step": 10094
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.31661856174469,
      "learning_rate": 2.495954945899116e-05,
      "loss": 1.3782,
      "step": 10095
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2825801372528076,
      "learning_rate": 2.494369421971785e-05,
      "loss": 0.6311,
      "step": 10096
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0390148162841797,
      "learning_rate": 2.492784330036674e-05,
      "loss": 1.3396,
      "step": 10097
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1273412704467773,
      "learning_rate": 2.491199670185008e-05,
      "loss": 1.2869,
      "step": 10098
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3142532110214233,
      "learning_rate": 2.489615442508e-05,
      "loss": 1.7977,
      "step": 10099
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6904337406158447,
      "learning_rate": 2.4880316470968256e-05,
      "loss": 2.577,
      "step": 10100
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4858710765838623,
      "learning_rate": 2.4864482840426463e-05,
      "loss": 1.3461,
      "step": 10101
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.962883234024048,
      "learning_rate": 2.4848653534365886e-05,
      "loss": 1.6413,
      "step": 10102
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.78583163022995,
      "learning_rate": 2.4832828553697652e-05,
      "loss": 1.9942,
      "step": 10103
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7538074254989624,
      "learning_rate": 2.481700789933252e-05,
      "loss": 1.314,
      "step": 10104
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3196853399276733,
      "learning_rate": 2.480119157218108e-05,
      "loss": 1.413,
      "step": 10105
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.423962116241455,
      "learning_rate": 2.47853795731537e-05,
      "loss": 0.9063,
      "step": 10106
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0259861946105957,
      "learning_rate": 2.476957190316037e-05,
      "loss": 1.4729,
      "step": 10107
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2921241521835327,
      "learning_rate": 2.475376856311097e-05,
      "loss": 1.3765,
      "step": 10108
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5001940727233887,
      "learning_rate": 2.4737969553915076e-05,
      "loss": 1.4177,
      "step": 10109
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.8482484817504883,
      "learning_rate": 2.4722174876481986e-05,
      "loss": 1.0359,
      "step": 10110
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1255379915237427,
      "learning_rate": 2.4706384531720763e-05,
      "loss": 1.083,
      "step": 10111
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4298447370529175,
      "learning_rate": 2.4690598520540274e-05,
      "loss": 0.7046,
      "step": 10112
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.849731922149658,
      "learning_rate": 2.4674816843849045e-05,
      "loss": 1.9271,
      "step": 10113
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4520363807678223,
      "learning_rate": 2.4659039502555438e-05,
      "loss": 1.1908,
      "step": 10114
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6021053791046143,
      "learning_rate": 2.464326649756754e-05,
      "loss": 1.0138,
      "step": 10115
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7300106287002563,
      "learning_rate": 2.4627497829793122e-05,
      "loss": 0.8963,
      "step": 10116
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7522040605545044,
      "learning_rate": 2.461173350013981e-05,
      "loss": 1.8231,
      "step": 10117
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.518182396888733,
      "learning_rate": 2.4595973509514945e-05,
      "loss": 1.5042,
      "step": 10118
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.840301513671875,
      "learning_rate": 2.4580217858825583e-05,
      "loss": 1.4807,
      "step": 10119
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.9615505337715149,
      "learning_rate": 2.4564466548978525e-05,
      "loss": 1.2974,
      "step": 10120
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0386388301849365,
      "learning_rate": 2.4548719580880395e-05,
      "loss": 0.944,
      "step": 10121
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2589060068130493,
      "learning_rate": 2.453297695543747e-05,
      "loss": 1.0836,
      "step": 10122
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3363224267959595,
      "learning_rate": 2.4517238673555863e-05,
      "loss": 1.6377,
      "step": 10123
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.276466727256775,
      "learning_rate": 2.4501504736141434e-05,
      "loss": 1.5671,
      "step": 10124
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1658958196640015,
      "learning_rate": 2.4485775144099688e-05,
      "loss": 1.0322,
      "step": 10125
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1362121105194092,
      "learning_rate": 2.447004989833599e-05,
      "loss": 1.2068,
      "step": 10126
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.567860722541809,
      "learning_rate": 2.445432899975546e-05,
      "loss": 0.5936,
      "step": 10127
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.294980525970459,
      "learning_rate": 2.4438612449262877e-05,
      "loss": 1.3026,
      "step": 10128
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6871236562728882,
      "learning_rate": 2.44229002477628e-05,
      "loss": 1.3259,
      "step": 10129
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2359237670898438,
      "learning_rate": 2.4407192396159627e-05,
      "loss": 1.257,
      "step": 10130
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.233612060546875,
      "learning_rate": 2.439148889535735e-05,
      "loss": 1.5264,
      "step": 10131
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3286584615707397,
      "learning_rate": 2.4375789746259846e-05,
      "loss": 1.7036,
      "step": 10132
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5169525146484375,
      "learning_rate": 2.4360094949770716e-05,
      "loss": 1.363,
      "step": 10133
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9286587238311768,
      "learning_rate": 2.4344404506793217e-05,
      "loss": 1.9903,
      "step": 10134
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3013397455215454,
      "learning_rate": 2.432871841823047e-05,
      "loss": 1.2795,
      "step": 10135
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.143397331237793,
      "learning_rate": 2.4313036684985313e-05,
      "loss": 1.3704,
      "step": 10136
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2893785238265991,
      "learning_rate": 2.4297359307960278e-05,
      "loss": 1.4372,
      "step": 10137
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2716968059539795,
      "learning_rate": 2.4281686288057725e-05,
      "loss": 1.4591,
      "step": 10138
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.525252103805542,
      "learning_rate": 2.4266017626179715e-05,
      "loss": 1.7467,
      "step": 10139
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.238668203353882,
      "learning_rate": 2.4250353323228036e-05,
      "loss": 1.658,
      "step": 10140
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6961712837219238,
      "learning_rate": 2.423469338010429e-05,
      "loss": 1.8795,
      "step": 10141
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3676762580871582,
      "learning_rate": 2.421903779770982e-05,
      "loss": 1.6129,
      "step": 10142
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0046651363372803,
      "learning_rate": 2.420338657694564e-05,
      "loss": 1.1905,
      "step": 10143
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1732561588287354,
      "learning_rate": 2.4187739718712598e-05,
      "loss": 1.5178,
      "step": 10144
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3926318883895874,
      "learning_rate": 2.4172097223911293e-05,
      "loss": 1.4241,
      "step": 10145
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4826138019561768,
      "learning_rate": 2.4156459093441974e-05,
      "loss": 1.2235,
      "step": 10146
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.1173343658447266,
      "learning_rate": 2.4140825328204765e-05,
      "loss": 1.4468,
      "step": 10147
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6917020082473755,
      "learning_rate": 2.4125195929099452e-05,
      "loss": 1.29,
      "step": 10148
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.040931224822998,
      "learning_rate": 2.410957089702557e-05,
      "loss": 1.3753,
      "step": 10149
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6212220191955566,
      "learning_rate": 2.4093950232882456e-05,
      "loss": 2.2213,
      "step": 10150
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.214929461479187,
      "learning_rate": 2.40783339375692e-05,
      "loss": 1.335,
      "step": 10151
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4826074838638306,
      "learning_rate": 2.406272201198454e-05,
      "loss": 1.5751,
      "step": 10152
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2642903327941895,
      "learning_rate": 2.4047114457027086e-05,
      "loss": 1.2021,
      "step": 10153
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5221199989318848,
      "learning_rate": 2.4031511273595154e-05,
      "loss": 0.83,
      "step": 10154
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.020369291305542,
      "learning_rate": 2.401591246258673e-05,
      "loss": 1.3063,
      "step": 10155
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9995207786560059,
      "learning_rate": 2.4000318024899703e-05,
      "loss": 1.1259,
      "step": 10156
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4989709854125977,
      "learning_rate": 2.3984727961431562e-05,
      "loss": 1.6919,
      "step": 10157
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3907073736190796,
      "learning_rate": 2.3969142273079603e-05,
      "loss": 0.9226,
      "step": 10158
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3350013494491577,
      "learning_rate": 2.3953560960740884e-05,
      "loss": 1.4206,
      "step": 10159
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2235488891601562,
      "learning_rate": 2.393798402531222e-05,
      "loss": 0.9743,
      "step": 10160
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.613374948501587,
      "learning_rate": 2.392241146769012e-05,
      "loss": 1.8305,
      "step": 10161
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.571262240409851,
      "learning_rate": 2.3906843288770886e-05,
      "loss": 1.1427,
      "step": 10162
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4119518995285034,
      "learning_rate": 2.389127948945059e-05,
      "loss": 0.9809,
      "step": 10163
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3172999620437622,
      "learning_rate": 2.3875720070624964e-05,
      "loss": 1.7241,
      "step": 10164
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.246455669403076,
      "learning_rate": 2.3860165033189587e-05,
      "loss": 1.258,
      "step": 10165
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.176820158958435,
      "learning_rate": 2.38446143780397e-05,
      "loss": 1.5582,
      "step": 10166
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.358750581741333,
      "learning_rate": 2.3829068106070386e-05,
      "loss": 1.5562,
      "step": 10167
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9855120182037354,
      "learning_rate": 2.381352621817635e-05,
      "loss": 1.4328,
      "step": 10168
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8535188436508179,
      "learning_rate": 2.3797988715252194e-05,
      "loss": 1.2339,
      "step": 10169
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.709083318710327,
      "learning_rate": 2.378245559819212e-05,
      "loss": 1.7473,
      "step": 10170
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.9572458267211914,
      "learning_rate": 2.3766926867890184e-05,
      "loss": 1.5538,
      "step": 10171
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2064032554626465,
      "learning_rate": 2.3751402525240174e-05,
      "loss": 1.8655,
      "step": 10172
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0631041526794434,
      "learning_rate": 2.3735882571135558e-05,
      "loss": 0.9222,
      "step": 10173
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6758503913879395,
      "learning_rate": 2.3720367006469635e-05,
      "loss": 0.9148,
      "step": 10174
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.296523332595825,
      "learning_rate": 2.370485583213542e-05,
      "loss": 1.9334,
      "step": 10175
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7607228755950928,
      "learning_rate": 2.3689349049025655e-05,
      "loss": 1.5188,
      "step": 10176
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9892498254776001,
      "learning_rate": 2.3673846658032818e-05,
      "loss": 1.1674,
      "step": 10177
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1122443675994873,
      "learning_rate": 2.365834866004919e-05,
      "loss": 1.3264,
      "step": 10178
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9698669910430908,
      "learning_rate": 2.3642855055966784e-05,
      "loss": 1.2959,
      "step": 10179
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6203653812408447,
      "learning_rate": 2.3627365846677306e-05,
      "loss": 0.969,
      "step": 10180
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.123638391494751,
      "learning_rate": 2.361188103307227e-05,
      "loss": 1.2256,
      "step": 10181
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.234573245048523,
      "learning_rate": 2.3596400616042946e-05,
      "loss": 0.8283,
      "step": 10182
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2278850078582764,
      "learning_rate": 2.3580924596480268e-05,
      "loss": 1.2426,
      "step": 10183
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.924994707107544,
      "learning_rate": 2.3565452975275015e-05,
      "loss": 1.9367,
      "step": 10184
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3290011882781982,
      "learning_rate": 2.3549985753317648e-05,
      "loss": 1.5146,
      "step": 10185
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2124725580215454,
      "learning_rate": 2.353452293149837e-05,
      "loss": 1.26,
      "step": 10186
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7276571989059448,
      "learning_rate": 2.3519064510707177e-05,
      "loss": 1.5028,
      "step": 10187
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1414313316345215,
      "learning_rate": 2.350361049183383e-05,
      "loss": 1.3223,
      "step": 10188
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4662420749664307,
      "learning_rate": 2.3488160875767717e-05,
      "loss": 1.1915,
      "step": 10189
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4922484159469604,
      "learning_rate": 2.3472715663398102e-05,
      "loss": 1.5138,
      "step": 10190
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.29764723777771,
      "learning_rate": 2.3457274855613966e-05,
      "loss": 2.0212,
      "step": 10191
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2088258266448975,
      "learning_rate": 2.3441838453303956e-05,
      "loss": 0.9579,
      "step": 10192
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6567411422729492,
      "learning_rate": 2.3426406457356588e-05,
      "loss": 1.4184,
      "step": 10193
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3310822248458862,
      "learning_rate": 2.341097886866004e-05,
      "loss": 1.3153,
      "step": 10194
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.958313226699829,
      "learning_rate": 2.339555568810221e-05,
      "loss": 1.7202,
      "step": 10195
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5622233152389526,
      "learning_rate": 2.3380136916570837e-05,
      "loss": 1.7475,
      "step": 10196
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7719563245773315,
      "learning_rate": 2.336472255495338e-05,
      "loss": 1.2877,
      "step": 10197
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0490378141403198,
      "learning_rate": 2.3349312604136976e-05,
      "loss": 1.1087,
      "step": 10198
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5470198392868042,
      "learning_rate": 2.3333907065008575e-05,
      "loss": 1.4349,
      "step": 10199
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.119529962539673,
      "learning_rate": 2.331850593845488e-05,
      "loss": 1.5808,
      "step": 10200
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1703623533248901,
      "learning_rate": 2.3303109225362276e-05,
      "loss": 0.9927,
      "step": 10201
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.307419538497925,
      "learning_rate": 2.3287716926616976e-05,
      "loss": 1.2372,
      "step": 10202
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3553659915924072,
      "learning_rate": 2.3272329043104836e-05,
      "loss": 1.7407,
      "step": 10203
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3351954221725464,
      "learning_rate": 2.3256945575711574e-05,
      "loss": 2.0252,
      "step": 10204
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5196871757507324,
      "learning_rate": 2.3241566525322554e-05,
      "loss": 1.6439,
      "step": 10205
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4724044799804688,
      "learning_rate": 2.322619189282298e-05,
      "loss": 1.2229,
      "step": 10206
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8213305473327637,
      "learning_rate": 2.3210821679097673e-05,
      "loss": 0.909,
      "step": 10207
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.465022325515747,
      "learning_rate": 2.3195455885031335e-05,
      "loss": 1.8776,
      "step": 10208
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2833036184310913,
      "learning_rate": 2.3180094511508366e-05,
      "loss": 1.6384,
      "step": 10209
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0823593139648438,
      "learning_rate": 2.3164737559412854e-05,
      "loss": 1.3854,
      "step": 10210
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1833767890930176,
      "learning_rate": 2.3149385029628702e-05,
      "loss": 1.4626,
      "step": 10211
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3802599906921387,
      "learning_rate": 2.313403692303957e-05,
      "loss": 1.1911,
      "step": 10212
    },
    {
      "epoch": 0.78,
      "grad_norm": 4.499886989593506,
      "learning_rate": 2.31186932405288e-05,
      "loss": 2.2522,
      "step": 10213
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.140621304512024,
      "learning_rate": 2.310335398297947e-05,
      "loss": 0.823,
      "step": 10214
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.963206946849823,
      "learning_rate": 2.308801915127452e-05,
      "loss": 1.5972,
      "step": 10215
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7569546699523926,
      "learning_rate": 2.307268874629649e-05,
      "loss": 1.5564,
      "step": 10216
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3750438690185547,
      "learning_rate": 2.3057362768927748e-05,
      "loss": 1.6835,
      "step": 10217
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2736729383468628,
      "learning_rate": 2.3042041220050448e-05,
      "loss": 1.1152,
      "step": 10218
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8680434226989746,
      "learning_rate": 2.3026724100546358e-05,
      "loss": 1.4044,
      "step": 10219
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0678179264068604,
      "learning_rate": 2.30114114112971e-05,
      "loss": 0.8576,
      "step": 10220
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5776033401489258,
      "learning_rate": 2.2996103153184035e-05,
      "loss": 1.2404,
      "step": 10221
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.4493343830108643,
      "learning_rate": 2.298079932708821e-05,
      "loss": 0.9087,
      "step": 10222
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.166590929031372,
      "learning_rate": 2.2965499933890422e-05,
      "loss": 1.178,
      "step": 10223
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8739243745803833,
      "learning_rate": 2.295020497447129e-05,
      "loss": 1.4912,
      "step": 10224
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6634600162506104,
      "learning_rate": 2.2934914449711087e-05,
      "loss": 2.0422,
      "step": 10225
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5622506141662598,
      "learning_rate": 2.2919628360489887e-05,
      "loss": 1.5866,
      "step": 10226
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5866303443908691,
      "learning_rate": 2.290434670768752e-05,
      "loss": 1.3619,
      "step": 10227
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.846595287322998,
      "learning_rate": 2.288906949218348e-05,
      "loss": 1.7737,
      "step": 10228
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4364891052246094,
      "learning_rate": 2.2873796714857088e-05,
      "loss": 1.1521,
      "step": 10229
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.303168773651123,
      "learning_rate": 2.2858528376587407e-05,
      "loss": 1.7019,
      "step": 10230
    },
    {
      "epoch": 0.78,
      "grad_norm": 5.600284576416016,
      "learning_rate": 2.2843264478253156e-05,
      "loss": 1.579,
      "step": 10231
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7829550504684448,
      "learning_rate": 2.2828005020732923e-05,
      "loss": 0.995,
      "step": 10232
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.231004238128662,
      "learning_rate": 2.2812750004904947e-05,
      "loss": 1.1216,
      "step": 10233
    },
    {
      "epoch": 0.78,
      "grad_norm": 6.287426471710205,
      "learning_rate": 2.2797499431647218e-05,
      "loss": 1.8519,
      "step": 10234
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.9413648843765259,
      "learning_rate": 2.278225330183751e-05,
      "loss": 1.0369,
      "step": 10235
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5308462381362915,
      "learning_rate": 2.276701161635336e-05,
      "loss": 0.7455,
      "step": 10236
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2821602821350098,
      "learning_rate": 2.2751774376071976e-05,
      "loss": 1.2341,
      "step": 10237
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4500802755355835,
      "learning_rate": 2.273654158187035e-05,
      "loss": 1.2394,
      "step": 10238
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.6708686351776123,
      "learning_rate": 2.2721313234625264e-05,
      "loss": 0.9555,
      "step": 10239
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4238015413284302,
      "learning_rate": 2.2706089335213122e-05,
      "loss": 1.2652,
      "step": 10240
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.865358591079712,
      "learning_rate": 2.2690869884510223e-05,
      "loss": 1.3364,
      "step": 10241
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.147339105606079,
      "learning_rate": 2.2675654883392495e-05,
      "loss": 1.7388,
      "step": 10242
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.7091846466064453,
      "learning_rate": 2.266044433273562e-05,
      "loss": 1.6247,
      "step": 10243
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3917655944824219,
      "learning_rate": 2.264523823341509e-05,
      "loss": 1.7022,
      "step": 10244
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0715548992156982,
      "learning_rate": 2.2630036586306123e-05,
      "loss": 0.9755,
      "step": 10245
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.07399845123291,
      "learning_rate": 2.2614839392283603e-05,
      "loss": 1.3518,
      "step": 10246
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6435877084732056,
      "learning_rate": 2.259964665222225e-05,
      "loss": 1.7668,
      "step": 10247
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.12947678565979,
      "learning_rate": 2.2584458366996532e-05,
      "loss": 1.5408,
      "step": 10248
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.321956753730774,
      "learning_rate": 2.2569274537480545e-05,
      "loss": 1.6204,
      "step": 10249
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.262129545211792,
      "learning_rate": 2.255409516454826e-05,
      "loss": 1.5474,
      "step": 10250
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1343709230422974,
      "learning_rate": 2.2538920249073335e-05,
      "loss": 1.1874,
      "step": 10251
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3488340377807617,
      "learning_rate": 2.2523749791929127e-05,
      "loss": 1.7528,
      "step": 10252
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2320499420166016,
      "learning_rate": 2.2508583793988813e-05,
      "loss": 1.3869,
      "step": 10253
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7804023027420044,
      "learning_rate": 2.249342225612532e-05,
      "loss": 1.0082,
      "step": 10254
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3252415657043457,
      "learning_rate": 2.247826517921121e-05,
      "loss": 1.2996,
      "step": 10255
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4487847089767456,
      "learning_rate": 2.2463112564118906e-05,
      "loss": 1.2793,
      "step": 10256
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0349596738815308,
      "learning_rate": 2.2447964411720544e-05,
      "loss": 1.2879,
      "step": 10257
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.230075478553772,
      "learning_rate": 2.2432820722887937e-05,
      "loss": 1.1717,
      "step": 10258
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9939799308776855,
      "learning_rate": 2.241768149849274e-05,
      "loss": 1.2677,
      "step": 10259
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6414417028427124,
      "learning_rate": 2.240254673940625e-05,
      "loss": 1.6781,
      "step": 10260
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9245704412460327,
      "learning_rate": 2.238741644649962e-05,
      "loss": 1.0322,
      "step": 10261
    },
    {
      "epoch": 0.78,
      "grad_norm": 4.286152362823486,
      "learning_rate": 2.237229062064363e-05,
      "loss": 0.8659,
      "step": 10262
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7205018997192383,
      "learning_rate": 2.235716926270891e-05,
      "loss": 1.4307,
      "step": 10263
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0017297267913818,
      "learning_rate": 2.2342052373565714e-05,
      "loss": 1.0799,
      "step": 10264
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.7296621799468994,
      "learning_rate": 2.2326939954084157e-05,
      "loss": 1.1015,
      "step": 10265
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3361928462982178,
      "learning_rate": 2.231183200513406e-05,
      "loss": 1.6817,
      "step": 10266
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8246177434921265,
      "learning_rate": 2.229672852758492e-05,
      "loss": 1.393,
      "step": 10267
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9531406164169312,
      "learning_rate": 2.2281629522306058e-05,
      "loss": 1.5346,
      "step": 10268
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.405564069747925,
      "learning_rate": 2.2266534990166534e-05,
      "loss": 1.7776,
      "step": 10269
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5127654075622559,
      "learning_rate": 2.2251444932035094e-05,
      "loss": 1.1339,
      "step": 10270
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4775466918945312,
      "learning_rate": 2.2236359348780223e-05,
      "loss": 1.5728,
      "step": 10271
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5138144493103027,
      "learning_rate": 2.222127824127026e-05,
      "loss": 1.5532,
      "step": 10272
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3580570220947266,
      "learning_rate": 2.2206201610373145e-05,
      "loss": 0.8739,
      "step": 10273
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9199460744857788,
      "learning_rate": 2.219112945695665e-05,
      "loss": 1.7487,
      "step": 10274
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4149489402770996,
      "learning_rate": 2.217606178188829e-05,
      "loss": 1.4773,
      "step": 10275
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6612489223480225,
      "learning_rate": 2.2160998586035243e-05,
      "loss": 0.8603,
      "step": 10276
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8144745826721191,
      "learning_rate": 2.214593987026451e-05,
      "loss": 1.2001,
      "step": 10277
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5093507766723633,
      "learning_rate": 2.2130885635442844e-05,
      "loss": 1.8788,
      "step": 10278
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.077857732772827,
      "learning_rate": 2.2115835882436653e-05,
      "loss": 1.0996,
      "step": 10279
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0236799716949463,
      "learning_rate": 2.2100790612112133e-05,
      "loss": 1.6088,
      "step": 10280
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.0442557334899902,
      "learning_rate": 2.2085749825335268e-05,
      "loss": 1.2871,
      "step": 10281
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5086783170700073,
      "learning_rate": 2.2070713522971688e-05,
      "loss": 1.668,
      "step": 10282
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.381106972694397,
      "learning_rate": 2.2055681705886844e-05,
      "loss": 1.6965,
      "step": 10283
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.470456600189209,
      "learning_rate": 2.2040654374945936e-05,
      "loss": 1.7846,
      "step": 10284
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2957077026367188,
      "learning_rate": 2.2025631531013824e-05,
      "loss": 1.3491,
      "step": 10285
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.1791737079620361,
      "learning_rate": 2.2010613174955175e-05,
      "loss": 1.0261,
      "step": 10286
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6915568113327026,
      "learning_rate": 2.1995599307634408e-05,
      "loss": 1.2019,
      "step": 10287
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.3494272232055664,
      "learning_rate": 2.198058992991564e-05,
      "loss": 1.2711,
      "step": 10288
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2380645275115967,
      "learning_rate": 2.1965585042662716e-05,
      "loss": 1.6258,
      "step": 10289
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3834365606307983,
      "learning_rate": 2.1950584646739304e-05,
      "loss": 1.3654,
      "step": 10290
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2606459856033325,
      "learning_rate": 2.1935588743008717e-05,
      "loss": 1.6522,
      "step": 10291
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1530861854553223,
      "learning_rate": 2.192059733233408e-05,
      "loss": 1.3032,
      "step": 10292
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2346930503845215,
      "learning_rate": 2.190561041557826e-05,
      "loss": 1.5077,
      "step": 10293
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4297270774841309,
      "learning_rate": 2.1890627993603773e-05,
      "loss": 1.0338,
      "step": 10294
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3955780267715454,
      "learning_rate": 2.1875650067273e-05,
      "loss": 0.7341,
      "step": 10295
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2873024940490723,
      "learning_rate": 2.1860676637448007e-05,
      "loss": 1.5915,
      "step": 10296
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0769273042678833,
      "learning_rate": 2.184570770499056e-05,
      "loss": 1.0857,
      "step": 10297
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3265657424926758,
      "learning_rate": 2.1830743270762266e-05,
      "loss": 0.9261,
      "step": 10298
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.990922212600708,
      "learning_rate": 2.1815783335624373e-05,
      "loss": 1.2053,
      "step": 10299
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5247743129730225,
      "learning_rate": 2.1800827900437894e-05,
      "loss": 1.6055,
      "step": 10300
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4926096200942993,
      "learning_rate": 2.1785876966063613e-05,
      "loss": 0.9088,
      "step": 10301
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7898603677749634,
      "learning_rate": 2.177093053336209e-05,
      "loss": 1.3379,
      "step": 10302
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0150203704833984,
      "learning_rate": 2.175598860319352e-05,
      "loss": 1.7474,
      "step": 10303
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0486539602279663,
      "learning_rate": 2.1741051176417915e-05,
      "loss": 1.1326,
      "step": 10304
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.951204299926758,
      "learning_rate": 2.1726118253895034e-05,
      "loss": 1.837,
      "step": 10305
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.8297805786132812,
      "learning_rate": 2.1711189836484314e-05,
      "loss": 1.4437,
      "step": 10306
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5981804132461548,
      "learning_rate": 2.1696265925045022e-05,
      "loss": 1.574,
      "step": 10307
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0319948196411133,
      "learning_rate": 2.1681346520436075e-05,
      "loss": 1.7607,
      "step": 10308
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.3448288440704346,
      "learning_rate": 2.166643162351615e-05,
      "loss": 1.4207,
      "step": 10309
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5059560537338257,
      "learning_rate": 2.1651521235143714e-05,
      "loss": 1.2377,
      "step": 10310
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3950608968734741,
      "learning_rate": 2.1636615356176948e-05,
      "loss": 1.3161,
      "step": 10311
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.422014832496643,
      "learning_rate": 2.162171398747379e-05,
      "loss": 0.9048,
      "step": 10312
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1941593885421753,
      "learning_rate": 2.1606817129891853e-05,
      "loss": 1.7764,
      "step": 10313
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1132652759552002,
      "learning_rate": 2.1591924784288563e-05,
      "loss": 0.9591,
      "step": 10314
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3200453519821167,
      "learning_rate": 2.157703695152109e-05,
      "loss": 1.2144,
      "step": 10315
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2258834838867188,
      "learning_rate": 2.156215363244628e-05,
      "loss": 1.5982,
      "step": 10316
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9152390956878662,
      "learning_rate": 2.1547274827920728e-05,
      "loss": 1.3042,
      "step": 10317
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0476592779159546,
      "learning_rate": 2.1532400538800857e-05,
      "loss": 0.6245,
      "step": 10318
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6245414018630981,
      "learning_rate": 2.1517530765942696e-05,
      "loss": 1.2137,
      "step": 10319
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.447469472885132,
      "learning_rate": 2.1502665510202135e-05,
      "loss": 1.8349,
      "step": 10320
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3006415367126465,
      "learning_rate": 2.148780477243476e-05,
      "loss": 1.2003,
      "step": 10321
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2994611263275146,
      "learning_rate": 2.1472948553495865e-05,
      "loss": 1.2442,
      "step": 10322
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.089862585067749,
      "learning_rate": 2.1458096854240506e-05,
      "loss": 0.942,
      "step": 10323
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1525862216949463,
      "learning_rate": 2.1443249675523536e-05,
      "loss": 1.2795,
      "step": 10324
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7350486516952515,
      "learning_rate": 2.142840701819946e-05,
      "loss": 1.1166,
      "step": 10325
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.172192096710205,
      "learning_rate": 2.1413568883122524e-05,
      "loss": 1.7216,
      "step": 10326
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4415427446365356,
      "learning_rate": 2.1398735271146818e-05,
      "loss": 1.3492,
      "step": 10327
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3800394535064697,
      "learning_rate": 2.1383906183126033e-05,
      "loss": 1.9353,
      "step": 10328
    },
    {
      "epoch": 0.79,
      "grad_norm": 6.11212158203125,
      "learning_rate": 2.13690816199137e-05,
      "loss": 2.3295,
      "step": 10329
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.8719983100891113,
      "learning_rate": 2.135426158236309e-05,
      "loss": 1.649,
      "step": 10330
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.118711233139038,
      "learning_rate": 2.1339446071327117e-05,
      "loss": 1.3445,
      "step": 10331
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3989181518554688,
      "learning_rate": 2.132463508765854e-05,
      "loss": 1.3127,
      "step": 10332
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.8856089115142822,
      "learning_rate": 2.1309828632209827e-05,
      "loss": 1.7002,
      "step": 10333
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0008248090744019,
      "learning_rate": 2.129502670583313e-05,
      "loss": 0.7212,
      "step": 10334
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6279164552688599,
      "learning_rate": 2.128022930938045e-05,
      "loss": 1.5476,
      "step": 10335
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5829100608825684,
      "learning_rate": 2.1265436443703413e-05,
      "loss": 0.8978,
      "step": 10336
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5695823431015015,
      "learning_rate": 2.1250648109653413e-05,
      "loss": 1.8838,
      "step": 10337
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.602327346801758,
      "learning_rate": 2.1235864308081644e-05,
      "loss": 2.0509,
      "step": 10338
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.26462459564209,
      "learning_rate": 2.1221085039839017e-05,
      "loss": 0.962,
      "step": 10339
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4017794132232666,
      "learning_rate": 2.12063103057761e-05,
      "loss": 0.8392,
      "step": 10340
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3349038362503052,
      "learning_rate": 2.11915401067433e-05,
      "loss": 0.9764,
      "step": 10341
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2039541006088257,
      "learning_rate": 2.1176774443590764e-05,
      "loss": 1.3759,
      "step": 10342
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.3355958461761475,
      "learning_rate": 2.1162013317168272e-05,
      "loss": 1.9273,
      "step": 10343
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.756261110305786,
      "learning_rate": 2.114725672832547e-05,
      "loss": 2.3434,
      "step": 10344
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.51393723487854,
      "learning_rate": 2.1132504677911658e-05,
      "loss": 1.7303,
      "step": 10345
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.408240556716919,
      "learning_rate": 2.1117757166775887e-05,
      "loss": 1.6912,
      "step": 10346
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5540435314178467,
      "learning_rate": 2.1103014195766955e-05,
      "loss": 1.1136,
      "step": 10347
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8081711530685425,
      "learning_rate": 2.108827576573347e-05,
      "loss": 1.8555,
      "step": 10348
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.7279257774353027,
      "learning_rate": 2.107354187752363e-05,
      "loss": 1.9269,
      "step": 10349
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4006119966506958,
      "learning_rate": 2.1058812531985506e-05,
      "loss": 1.511,
      "step": 10350
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2822036743164062,
      "learning_rate": 2.1044087729966856e-05,
      "loss": 1.7433,
      "step": 10351
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5592044591903687,
      "learning_rate": 2.1029367472315155e-05,
      "loss": 1.2377,
      "step": 10352
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4107394218444824,
      "learning_rate": 2.1014651759877668e-05,
      "loss": 1.5657,
      "step": 10353
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.540235757827759,
      "learning_rate": 2.0999940593501344e-05,
      "loss": 1.4018,
      "step": 10354
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1023447513580322,
      "learning_rate": 2.098523397403288e-05,
      "loss": 1.5223,
      "step": 10355
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.119602918624878,
      "learning_rate": 2.0970531902318757e-05,
      "loss": 1.4662,
      "step": 10356
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.8645565509796143,
      "learning_rate": 2.0955834379205175e-05,
      "loss": 1.3189,
      "step": 10357
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2576910257339478,
      "learning_rate": 2.094114140553801e-05,
      "loss": 1.262,
      "step": 10358
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0102269649505615,
      "learning_rate": 2.092645298216296e-05,
      "loss": 1.3555,
      "step": 10359
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.29190993309021,
      "learning_rate": 2.091176910992545e-05,
      "loss": 1.7959,
      "step": 10360
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3016934394836426,
      "learning_rate": 2.0897089789670575e-05,
      "loss": 1.5402,
      "step": 10361
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5580062866210938,
      "learning_rate": 2.088241502224324e-05,
      "loss": 1.5303,
      "step": 10362
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5909135341644287,
      "learning_rate": 2.0867744808488077e-05,
      "loss": 1.7392,
      "step": 10363
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7036653757095337,
      "learning_rate": 2.0853079149249433e-05,
      "loss": 1.1359,
      "step": 10364
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1280869245529175,
      "learning_rate": 2.0838418045371357e-05,
      "loss": 1.2469,
      "step": 10365
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4335825443267822,
      "learning_rate": 2.0823761497697748e-05,
      "loss": 1.6481,
      "step": 10366
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2250354290008545,
      "learning_rate": 2.08091095070721e-05,
      "loss": 1.107,
      "step": 10367
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4458568096160889,
      "learning_rate": 2.079446207433777e-05,
      "loss": 1.6951,
      "step": 10368
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4654033184051514,
      "learning_rate": 2.0779819200337823e-05,
      "loss": 1.4708,
      "step": 10369
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3676612377166748,
      "learning_rate": 2.0765180885914983e-05,
      "loss": 1.3384,
      "step": 10370
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.8623501062393188,
      "learning_rate": 2.075054713191179e-05,
      "loss": 0.8512,
      "step": 10371
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.970920205116272,
      "learning_rate": 2.0735917939170534e-05,
      "loss": 1.3694,
      "step": 10372
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8514035940170288,
      "learning_rate": 2.0721293308533186e-05,
      "loss": 1.3575,
      "step": 10373
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.27990984916687,
      "learning_rate": 2.0706673240841444e-05,
      "loss": 1.3844,
      "step": 10374
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.66628885269165,
      "learning_rate": 2.069205773693683e-05,
      "loss": 3.2824,
      "step": 10375
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2906907796859741,
      "learning_rate": 2.0677446797660503e-05,
      "loss": 1.058,
      "step": 10376
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6691652536392212,
      "learning_rate": 2.0662840423853434e-05,
      "loss": 0.7816,
      "step": 10377
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4184250831604004,
      "learning_rate": 2.0648238616356332e-05,
      "loss": 1.5388,
      "step": 10378
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1960686445236206,
      "learning_rate": 2.0633641376009537e-05,
      "loss": 0.9833,
      "step": 10379
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3838565349578857,
      "learning_rate": 2.0619048703653266e-05,
      "loss": 1.4447,
      "step": 10380
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3346806764602661,
      "learning_rate": 2.0604460600127417e-05,
      "loss": 1.8061,
      "step": 10381
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3746936321258545,
      "learning_rate": 2.05898770662716e-05,
      "loss": 0.5823,
      "step": 10382
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.776827335357666,
      "learning_rate": 2.0575298102925156e-05,
      "loss": 1.8906,
      "step": 10383
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9814039468765259,
      "learning_rate": 2.0560723710927242e-05,
      "loss": 1.9588,
      "step": 10384
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2863662242889404,
      "learning_rate": 2.0546153891116626e-05,
      "loss": 1.6743,
      "step": 10385
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3958998918533325,
      "learning_rate": 2.0531588644331945e-05,
      "loss": 1.1272,
      "step": 10386
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4113547801971436,
      "learning_rate": 2.0517027971411507e-05,
      "loss": 1.6301,
      "step": 10387
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5296474695205688,
      "learning_rate": 2.050247187319333e-05,
      "loss": 1.1844,
      "step": 10388
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3087199926376343,
      "learning_rate": 2.0487920350515212e-05,
      "loss": 1.711,
      "step": 10389
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5563760995864868,
      "learning_rate": 2.0473373404214723e-05,
      "loss": 1.7579,
      "step": 10390
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7609108686447144,
      "learning_rate": 2.0458831035129054e-05,
      "loss": 1.0911,
      "step": 10391
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9577604532241821,
      "learning_rate": 2.044429324409526e-05,
      "loss": 1.6775,
      "step": 10392
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1995034217834473,
      "learning_rate": 2.042976003195005e-05,
      "loss": 1.3174,
      "step": 10393
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4005244970321655,
      "learning_rate": 2.0415231399529865e-05,
      "loss": 1.6397,
      "step": 10394
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8953138589859009,
      "learning_rate": 2.0400707347670943e-05,
      "loss": 1.5924,
      "step": 10395
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2051341533660889,
      "learning_rate": 2.038618787720925e-05,
      "loss": 1.2645,
      "step": 10396
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2028974294662476,
      "learning_rate": 2.0371672988980406e-05,
      "loss": 1.0357,
      "step": 10397
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1873831748962402,
      "learning_rate": 2.0357162683819864e-05,
      "loss": 1.2347,
      "step": 10398
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.604466438293457,
      "learning_rate": 2.03426569625628e-05,
      "loss": 0.8496,
      "step": 10399
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3761967420578003,
      "learning_rate": 2.0328155826044036e-05,
      "loss": 1.6255,
      "step": 10400
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1622555255889893,
      "learning_rate": 2.0313659275098262e-05,
      "loss": 0.8485,
      "step": 10401
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5182186365127563,
      "learning_rate": 2.029916731055981e-05,
      "loss": 1.4257,
      "step": 10402
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1777008771896362,
      "learning_rate": 2.028467993326274e-05,
      "loss": 1.2923,
      "step": 10403
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4282777309417725,
      "learning_rate": 2.0270197144040914e-05,
      "loss": 1.439,
      "step": 10404
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2331464290618896,
      "learning_rate": 2.025571894372794e-05,
      "loss": 1.7567,
      "step": 10405
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.1047685146331787,
      "learning_rate": 2.0241245333157054e-05,
      "loss": 0.8581,
      "step": 10406
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.123815894126892,
      "learning_rate": 2.0226776313161323e-05,
      "loss": 0.8899,
      "step": 10407
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.6041085720062256,
      "learning_rate": 2.0212311884573552e-05,
      "loss": 1.3614,
      "step": 10408
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5136016607284546,
      "learning_rate": 2.0197852048226208e-05,
      "loss": 1.2967,
      "step": 10409
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.8022873401641846,
      "learning_rate": 2.018339680495157e-05,
      "loss": 1.7438,
      "step": 10410
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.071612596511841,
      "learning_rate": 2.0168946155581614e-05,
      "loss": 1.1324,
      "step": 10411
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5627779960632324,
      "learning_rate": 2.0154500100948016e-05,
      "loss": 1.1449,
      "step": 10412
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4558613300323486,
      "learning_rate": 2.0140058641882264e-05,
      "loss": 1.6184,
      "step": 10413
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.276740312576294,
      "learning_rate": 2.0125621779215576e-05,
      "loss": 1.2338,
      "step": 10414
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.736552119255066,
      "learning_rate": 2.0111189513778827e-05,
      "loss": 2.1732,
      "step": 10415
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0800219774246216,
      "learning_rate": 2.009676184640269e-05,
      "loss": 1.1265,
      "step": 10416
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3318877220153809,
      "learning_rate": 2.0082338777917586e-05,
      "loss": 1.2123,
      "step": 10417
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.807565450668335,
      "learning_rate": 2.0067920309153608e-05,
      "loss": 1.8852,
      "step": 10418
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.0536898374557495,
      "learning_rate": 2.0053506440940673e-05,
      "loss": 1.1601,
      "step": 10419
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3225955963134766,
      "learning_rate": 2.003909717410831e-05,
      "loss": 0.9676,
      "step": 10420
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7266212701797485,
      "learning_rate": 2.0024692509485933e-05,
      "loss": 1.5551,
      "step": 10421
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9717953205108643,
      "learning_rate": 2.0010292447902544e-05,
      "loss": 0.7897,
      "step": 10422
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1379176378250122,
      "learning_rate": 1.9995896990187003e-05,
      "loss": 0.9348,
      "step": 10423
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4650861024856567,
      "learning_rate": 1.9981506137167805e-05,
      "loss": 0.7655,
      "step": 10424
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4647868871688843,
      "learning_rate": 1.996711988967326e-05,
      "loss": 1.0695,
      "step": 10425
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1098452806472778,
      "learning_rate": 1.9952738248531376e-05,
      "loss": 1.3538,
      "step": 10426
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9399628639221191,
      "learning_rate": 1.993836121456988e-05,
      "loss": 1.4455,
      "step": 10427
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.9839940071105957,
      "learning_rate": 1.9923988788616264e-05,
      "loss": 2.3436,
      "step": 10428
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4926270246505737,
      "learning_rate": 1.9909620971497766e-05,
      "loss": 1.2031,
      "step": 10429
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.340848684310913,
      "learning_rate": 1.989525776404132e-05,
      "loss": 1.4338,
      "step": 10430
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.1328084468841553,
      "learning_rate": 1.9880899167073575e-05,
      "loss": 1.8768,
      "step": 10431
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3619612455368042,
      "learning_rate": 1.9866545181421013e-05,
      "loss": 1.587,
      "step": 10432
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.087610125541687,
      "learning_rate": 1.9852195807909735e-05,
      "loss": 0.7808,
      "step": 10433
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0953805446624756,
      "learning_rate": 1.9837851047365653e-05,
      "loss": 1.0244,
      "step": 10434
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3017548322677612,
      "learning_rate": 1.9823510900614417e-05,
      "loss": 1.678,
      "step": 10435
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.8040530681610107,
      "learning_rate": 1.9809175368481336e-05,
      "loss": 2.0921,
      "step": 10436
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.421705961227417,
      "learning_rate": 1.9794844451791517e-05,
      "loss": 1.5253,
      "step": 10437
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5307285785675049,
      "learning_rate": 1.978051815136983e-05,
      "loss": 1.0894,
      "step": 10438
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.363107442855835,
      "learning_rate": 1.9766196468040797e-05,
      "loss": 1.921,
      "step": 10439
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4391319751739502,
      "learning_rate": 1.975187940262868e-05,
      "loss": 1.8491,
      "step": 10440
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2261419296264648,
      "learning_rate": 1.973756695595759e-05,
      "loss": 1.1556,
      "step": 10441
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.535504102706909,
      "learning_rate": 1.9723259128851214e-05,
      "loss": 1.5115,
      "step": 10442
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9866750240325928,
      "learning_rate": 1.9708955922133078e-05,
      "loss": 1.5083,
      "step": 10443
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5535962581634521,
      "learning_rate": 1.9694657336626443e-05,
      "loss": 1.3847,
      "step": 10444
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5742093324661255,
      "learning_rate": 1.968036337315423e-05,
      "loss": 2.1186,
      "step": 10445
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6967986822128296,
      "learning_rate": 1.966607403253915e-05,
      "loss": 1.2544,
      "step": 10446
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9431096315383911,
      "learning_rate": 1.9651789315603674e-05,
      "loss": 1.4515,
      "step": 10447
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.026411533355713,
      "learning_rate": 1.9637509223169947e-05,
      "loss": 1.9019,
      "step": 10448
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.195863962173462,
      "learning_rate": 1.9623233756059832e-05,
      "loss": 1.1434,
      "step": 10449
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9147151708602905,
      "learning_rate": 1.9608962915094996e-05,
      "loss": 1.1573,
      "step": 10450
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.0298380851745605,
      "learning_rate": 1.9594696701096848e-05,
      "loss": 2.3098,
      "step": 10451
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3998035192489624,
      "learning_rate": 1.9580435114886408e-05,
      "loss": 1.1735,
      "step": 10452
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4374645948410034,
      "learning_rate": 1.9566178157284564e-05,
      "loss": 1.6144,
      "step": 10453
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9166722297668457,
      "learning_rate": 1.9551925829111905e-05,
      "loss": 1.0293,
      "step": 10454
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.9628772735595703,
      "learning_rate": 1.9537678131188674e-05,
      "loss": 1.6382,
      "step": 10455
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.680443286895752,
      "learning_rate": 1.952343506433497e-05,
      "loss": 1.1037,
      "step": 10456
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6823060512542725,
      "learning_rate": 1.950919662937052e-05,
      "loss": 1.1479,
      "step": 10457
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7512602806091309,
      "learning_rate": 1.949496282711486e-05,
      "loss": 0.8924,
      "step": 10458
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.801991581916809,
      "learning_rate": 1.9480733658387175e-05,
      "loss": 1.0645,
      "step": 10459
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7685914039611816,
      "learning_rate": 1.9466509124006503e-05,
      "loss": 1.5399,
      "step": 10460
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5224568843841553,
      "learning_rate": 1.9452289224791485e-05,
      "loss": 0.8561,
      "step": 10461
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0429394245147705,
      "learning_rate": 1.94380739615606e-05,
      "loss": 1.3965,
      "step": 10462
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5155162811279297,
      "learning_rate": 1.942386333513202e-05,
      "loss": 1.0282,
      "step": 10463
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2746645212173462,
      "learning_rate": 1.9409657346323607e-05,
      "loss": 1.3008,
      "step": 10464
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6779309511184692,
      "learning_rate": 1.9395455995953036e-05,
      "loss": 1.4708,
      "step": 10465
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.115079402923584,
      "learning_rate": 1.9381259284837683e-05,
      "loss": 1.4993,
      "step": 10466
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3845131397247314,
      "learning_rate": 1.9367067213794632e-05,
      "loss": 1.7048,
      "step": 10467
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5991973876953125,
      "learning_rate": 1.9352879783640698e-05,
      "loss": 1.4047,
      "step": 10468
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.627150297164917,
      "learning_rate": 1.9338696995192495e-05,
      "loss": 1.4536,
      "step": 10469
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4077345132827759,
      "learning_rate": 1.9324518849266272e-05,
      "loss": 1.1724,
      "step": 10470
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1432071924209595,
      "learning_rate": 1.9310345346678083e-05,
      "loss": 1.2183,
      "step": 10471
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4223048686981201,
      "learning_rate": 1.9296176488243734e-05,
      "loss": 0.9769,
      "step": 10472
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.269151210784912,
      "learning_rate": 1.928201227477866e-05,
      "loss": 1.2761,
      "step": 10473
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2210006713867188,
      "learning_rate": 1.926785270709813e-05,
      "loss": 0.8276,
      "step": 10474
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.383636951446533,
      "learning_rate": 1.9253697786017132e-05,
      "loss": 1.5303,
      "step": 10475
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7434251308441162,
      "learning_rate": 1.923954751235033e-05,
      "loss": 1.8943,
      "step": 10476
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1070541143417358,
      "learning_rate": 1.9225401886912143e-05,
      "loss": 0.8813,
      "step": 10477
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2951587438583374,
      "learning_rate": 1.921126091051676e-05,
      "loss": 1.0558,
      "step": 10478
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5050772428512573,
      "learning_rate": 1.9197124583978045e-05,
      "loss": 1.3415,
      "step": 10479
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6811648607254028,
      "learning_rate": 1.9182992908109644e-05,
      "loss": 1.7614,
      "step": 10480
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5568299293518066,
      "learning_rate": 1.916886588372494e-05,
      "loss": 0.6647,
      "step": 10481
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0513867139816284,
      "learning_rate": 1.9154743511636976e-05,
      "loss": 1.3103,
      "step": 10482
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0907127857208252,
      "learning_rate": 1.9140625792658595e-05,
      "loss": 0.9693,
      "step": 10483
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.355168104171753,
      "learning_rate": 1.9126512727602386e-05,
      "loss": 1.2717,
      "step": 10484
    },
    {
      "epoch": 0.8,
      "grad_norm": 5.616031169891357,
      "learning_rate": 1.9112404317280606e-05,
      "loss": 2.0288,
      "step": 10485
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5210727453231812,
      "learning_rate": 1.9098300562505266e-05,
      "loss": 1.6614,
      "step": 10486
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.132000207901001,
      "learning_rate": 1.9084201464088148e-05,
      "loss": 1.4081,
      "step": 10487
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2612032890319824,
      "learning_rate": 1.907010702284069e-05,
      "loss": 1.3872,
      "step": 10488
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2097089290618896,
      "learning_rate": 1.9056017239574143e-05,
      "loss": 1.3126,
      "step": 10489
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1021642684936523,
      "learning_rate": 1.904193211509947e-05,
      "loss": 1.6345,
      "step": 10490
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.243290424346924,
      "learning_rate": 1.9027851650227312e-05,
      "loss": 1.8531,
      "step": 10491
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3576816320419312,
      "learning_rate": 1.901377584576809e-05,
      "loss": 1.2845,
      "step": 10492
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9456322193145752,
      "learning_rate": 1.8999704702531984e-05,
      "loss": 1.8138,
      "step": 10493
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.903002381324768,
      "learning_rate": 1.8985638221328815e-05,
      "loss": 1.6257,
      "step": 10494
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.048574447631836,
      "learning_rate": 1.897157640296825e-05,
      "loss": 1.0629,
      "step": 10495
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5493340492248535,
      "learning_rate": 1.895751924825959e-05,
      "loss": 1.6985,
      "step": 10496
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2335846424102783,
      "learning_rate": 1.8943466758011885e-05,
      "loss": 1.5885,
      "step": 10497
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.457963228225708,
      "learning_rate": 1.892941893303396e-05,
      "loss": 1.3595,
      "step": 10498
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5790932178497314,
      "learning_rate": 1.8915375774134368e-05,
      "loss": 1.3975,
      "step": 10499
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7766169905662537,
      "learning_rate": 1.8901337282121334e-05,
      "loss": 0.9792,
      "step": 10500
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6531203985214233,
      "learning_rate": 1.8887303457802873e-05,
      "loss": 1.2492,
      "step": 10501
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3794047832489014,
      "learning_rate": 1.8873274301986744e-05,
      "loss": 1.3165,
      "step": 10502
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1908174753189087,
      "learning_rate": 1.885924981548035e-05,
      "loss": 1.4694,
      "step": 10503
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3044562339782715,
      "learning_rate": 1.884522999909093e-05,
      "loss": 0.945,
      "step": 10504
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5808119773864746,
      "learning_rate": 1.883121485362538e-05,
      "loss": 1.4811,
      "step": 10505
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4940330982208252,
      "learning_rate": 1.881720437989033e-05,
      "loss": 1.4978,
      "step": 10506
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7814897298812866,
      "learning_rate": 1.8803198578692182e-05,
      "loss": 1.2148,
      "step": 10507
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.9117538928985596,
      "learning_rate": 1.8789197450837092e-05,
      "loss": 1.3002,
      "step": 10508
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5271022319793701,
      "learning_rate": 1.8775200997130837e-05,
      "loss": 1.371,
      "step": 10509
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.9656934142112732,
      "learning_rate": 1.8761209218379016e-05,
      "loss": 1.1403,
      "step": 10510
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.368303894996643,
      "learning_rate": 1.8747222115386988e-05,
      "loss": 1.6634,
      "step": 10511
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.4399445056915283,
      "learning_rate": 1.8733239688959715e-05,
      "loss": 1.3752,
      "step": 10512
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.009006977081299,
      "learning_rate": 1.871926193990202e-05,
      "loss": 0.8497,
      "step": 10513
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5654447078704834,
      "learning_rate": 1.8705288869018388e-05,
      "loss": 1.3757,
      "step": 10514
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4935816526412964,
      "learning_rate": 1.869132047711303e-05,
      "loss": 0.6624,
      "step": 10515
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4928648471832275,
      "learning_rate": 1.8677356764989907e-05,
      "loss": 1.3148,
      "step": 10516
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8575698137283325,
      "learning_rate": 1.8663397733452758e-05,
      "loss": 2.0185,
      "step": 10517
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.407145619392395,
      "learning_rate": 1.8649443383304942e-05,
      "loss": 1.9806,
      "step": 10518
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4716782569885254,
      "learning_rate": 1.863549371534965e-05,
      "loss": 1.3077,
      "step": 10519
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2900980710983276,
      "learning_rate": 1.862154873038978e-05,
      "loss": 2.7909,
      "step": 10520
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9971107244491577,
      "learning_rate": 1.8607608429227908e-05,
      "loss": 1.3356,
      "step": 10521
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3102216720581055,
      "learning_rate": 1.8593672812666386e-05,
      "loss": 0.8949,
      "step": 10522
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8804155588150024,
      "learning_rate": 1.857974188150733e-05,
      "loss": 1.0975,
      "step": 10523
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.569568395614624,
      "learning_rate": 1.856581563655251e-05,
      "loss": 1.3731,
      "step": 10524
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6628590822219849,
      "learning_rate": 1.855189407860344e-05,
      "loss": 1.8931,
      "step": 10525
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.7697024345397949,
      "learning_rate": 1.8537977208461444e-05,
      "loss": 0.9012,
      "step": 10526
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1476105451583862,
      "learning_rate": 1.852406502692745e-05,
      "loss": 1.1336,
      "step": 10527
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.643851637840271,
      "learning_rate": 1.851015753480223e-05,
      "loss": 1.962,
      "step": 10528
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6746450662612915,
      "learning_rate": 1.8496254732886244e-05,
      "loss": 0.8964,
      "step": 10529
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.579043984413147,
      "learning_rate": 1.8482356621979645e-05,
      "loss": 1.6859,
      "step": 10530
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2667186260223389,
      "learning_rate": 1.8468463202882358e-05,
      "loss": 1.406,
      "step": 10531
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4222666025161743,
      "learning_rate": 1.845457447639407e-05,
      "loss": 1.3288,
      "step": 10532
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.293709397315979,
      "learning_rate": 1.8440690443314135e-05,
      "loss": 1.102,
      "step": 10533
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3437453508377075,
      "learning_rate": 1.842681110444161e-05,
      "loss": 1.0143,
      "step": 10534
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1700804233551025,
      "learning_rate": 1.8412936460575415e-05,
      "loss": 1.0841,
      "step": 10535
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1887588500976562,
      "learning_rate": 1.839906651251403e-05,
      "loss": 1.3702,
      "step": 10536
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4013190269470215,
      "learning_rate": 1.8385201261055796e-05,
      "loss": 2.0458,
      "step": 10537
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.294952392578125,
      "learning_rate": 1.8371340706998764e-05,
      "loss": 1.075,
      "step": 10538
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0857570171356201,
      "learning_rate": 1.8357484851140628e-05,
      "loss": 0.9393,
      "step": 10539
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3392319679260254,
      "learning_rate": 1.8343633694278895e-05,
      "loss": 1.45,
      "step": 10540
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6343836784362793,
      "learning_rate": 1.8329787237210828e-05,
      "loss": 1.2119,
      "step": 10541
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.9406542778015137,
      "learning_rate": 1.831594548073332e-05,
      "loss": 1.3801,
      "step": 10542
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3620954751968384,
      "learning_rate": 1.830210842564303e-05,
      "loss": 1.0723,
      "step": 10543
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.1630585193634033,
      "learning_rate": 1.82882760727364e-05,
      "loss": 0.6967,
      "step": 10544
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2945141792297363,
      "learning_rate": 1.827444842280952e-05,
      "loss": 1.8512,
      "step": 10545
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.347915530204773,
      "learning_rate": 1.8260625476658278e-05,
      "loss": 1.6207,
      "step": 10546
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9608044624328613,
      "learning_rate": 1.824680723507829e-05,
      "loss": 1.8236,
      "step": 10547
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3048245906829834,
      "learning_rate": 1.823299369886482e-05,
      "loss": 1.7633,
      "step": 10548
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4406741857528687,
      "learning_rate": 1.8219184868812932e-05,
      "loss": 1.3124,
      "step": 10549
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.0514823198318481,
      "learning_rate": 1.8205380745717448e-05,
      "loss": 1.6018,
      "step": 10550
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3587098121643066,
      "learning_rate": 1.819158133037282e-05,
      "loss": 0.7983,
      "step": 10551
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7269341945648193,
      "learning_rate": 1.8177786623573322e-05,
      "loss": 1.4684,
      "step": 10552
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.734174132347107,
      "learning_rate": 1.81639966261129e-05,
      "loss": 1.3166,
      "step": 10553
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.367723822593689,
      "learning_rate": 1.8150211338785228e-05,
      "loss": 1.0702,
      "step": 10554
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8417779207229614,
      "learning_rate": 1.813643076238375e-05,
      "loss": 2.1045,
      "step": 10555
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.912177085876465,
      "learning_rate": 1.812265489770165e-05,
      "loss": 0.6454,
      "step": 10556
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2882939577102661,
      "learning_rate": 1.8108883745531734e-05,
      "loss": 1.5864,
      "step": 10557
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.196146845817566,
      "learning_rate": 1.8095117306666665e-05,
      "loss": 1.644,
      "step": 10558
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5546084642410278,
      "learning_rate": 1.8081355581898783e-05,
      "loss": 1.0914,
      "step": 10559
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5320578813552856,
      "learning_rate": 1.8067598572020127e-05,
      "loss": 0.8182,
      "step": 10560
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0464266538619995,
      "learning_rate": 1.8053846277822516e-05,
      "loss": 0.894,
      "step": 10561
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5164673328399658,
      "learning_rate": 1.8040098700097473e-05,
      "loss": 1.4373,
      "step": 10562
    },
    {
      "epoch": 0.81,
      "grad_norm": 5.597235202789307,
      "learning_rate": 1.802635583963621e-05,
      "loss": 2.2143,
      "step": 10563
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.106121301651001,
      "learning_rate": 1.801261769722974e-05,
      "loss": 1.5708,
      "step": 10564
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.882909059524536,
      "learning_rate": 1.799888427366879e-05,
      "loss": 1.8384,
      "step": 10565
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.130608320236206,
      "learning_rate": 1.798515556974375e-05,
      "loss": 1.0853,
      "step": 10566
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1235772371292114,
      "learning_rate": 1.7971431586244815e-05,
      "loss": 1.4227,
      "step": 10567
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3460983037948608,
      "learning_rate": 1.7957712323961905e-05,
      "loss": 1.3381,
      "step": 10568
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7226142883300781,
      "learning_rate": 1.7943997783684584e-05,
      "loss": 1.3892,
      "step": 10569
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0579215288162231,
      "learning_rate": 1.7930287966202265e-05,
      "loss": 1.4384,
      "step": 10570
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.446839451789856,
      "learning_rate": 1.791658287230399e-05,
      "loss": 1.2529,
      "step": 10571
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.097729206085205,
      "learning_rate": 1.790288250277854e-05,
      "loss": 1.0515,
      "step": 10572
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.291471242904663,
      "learning_rate": 1.7889186858414485e-05,
      "loss": 1.7089,
      "step": 10573
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.630776047706604,
      "learning_rate": 1.787549594000012e-05,
      "loss": 1.4074,
      "step": 10574
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0325665473937988,
      "learning_rate": 1.7861809748323367e-05,
      "loss": 1.3994,
      "step": 10575
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7339457273483276,
      "learning_rate": 1.784812828417197e-05,
      "loss": 1.3201,
      "step": 10576
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3752775192260742,
      "learning_rate": 1.783445154833342e-05,
      "loss": 1.3461,
      "step": 10577
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7408331632614136,
      "learning_rate": 1.7820779541594825e-05,
      "loss": 0.9798,
      "step": 10578
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1654802560806274,
      "learning_rate": 1.7807112264743153e-05,
      "loss": 1.3506,
      "step": 10579
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6951652765274048,
      "learning_rate": 1.779344971856497e-05,
      "loss": 1.4411,
      "step": 10580
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4921610355377197,
      "learning_rate": 1.7779791903846687e-05,
      "loss": 1.8249,
      "step": 10581
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1347987651824951,
      "learning_rate": 1.7766138821374334e-05,
      "loss": 1.3083,
      "step": 10582
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3400063514709473,
      "learning_rate": 1.775249047193377e-05,
      "loss": 1.5189,
      "step": 10583
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1339690685272217,
      "learning_rate": 1.7738846856310543e-05,
      "loss": 1.6325,
      "step": 10584
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3848423957824707,
      "learning_rate": 1.772520797528988e-05,
      "loss": 1.1116,
      "step": 10585
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9631904363632202,
      "learning_rate": 1.7711573829656803e-05,
      "loss": 1.6429,
      "step": 10586
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6163793802261353,
      "learning_rate": 1.7697944420196043e-05,
      "loss": 1.5559,
      "step": 10587
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6264461278915405,
      "learning_rate": 1.7684319747692023e-05,
      "loss": 1.2151,
      "step": 10588
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6230612993240356,
      "learning_rate": 1.767069981292897e-05,
      "loss": 1.2626,
      "step": 10589
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4429360628128052,
      "learning_rate": 1.7657084616690744e-05,
      "loss": 1.3652,
      "step": 10590
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.108141303062439,
      "learning_rate": 1.7643474159760975e-05,
      "loss": 1.4957,
      "step": 10591
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5908162593841553,
      "learning_rate": 1.7629868442923036e-05,
      "loss": 1.7174,
      "step": 10592
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8558958768844604,
      "learning_rate": 1.7616267466960047e-05,
      "loss": 1.2024,
      "step": 10593
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9676287174224854,
      "learning_rate": 1.7602671232654754e-05,
      "loss": 0.6682,
      "step": 10594
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6302624940872192,
      "learning_rate": 1.758907974078975e-05,
      "loss": 1.3052,
      "step": 10595
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6542348861694336,
      "learning_rate": 1.7575492992147312e-05,
      "loss": 1.8269,
      "step": 10596
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.087780475616455,
      "learning_rate": 1.75619109875094e-05,
      "loss": 1.4175,
      "step": 10597
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7169225215911865,
      "learning_rate": 1.754833372765776e-05,
      "loss": 1.1729,
      "step": 10598
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.47219717502594,
      "learning_rate": 1.753476121337384e-05,
      "loss": 1.2037,
      "step": 10599
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9355164766311646,
      "learning_rate": 1.752119344543879e-05,
      "loss": 0.8605,
      "step": 10600
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7052122354507446,
      "learning_rate": 1.7507630424633524e-05,
      "loss": 1.18,
      "step": 10601
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.228407859802246,
      "learning_rate": 1.7494072151738704e-05,
      "loss": 1.4017,
      "step": 10602
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.327043294906616,
      "learning_rate": 1.7480518627534638e-05,
      "loss": 1.494,
      "step": 10603
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2371989488601685,
      "learning_rate": 1.746696985280144e-05,
      "loss": 1.5523,
      "step": 10604
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2561900615692139,
      "learning_rate": 1.7453425828318936e-05,
      "loss": 1.5474,
      "step": 10605
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5546436309814453,
      "learning_rate": 1.7439886554866612e-05,
      "loss": 1.2454,
      "step": 10606
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8243160247802734,
      "learning_rate": 1.742635203322378e-05,
      "loss": 1.391,
      "step": 10607
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9207240343093872,
      "learning_rate": 1.7412822264169414e-05,
      "loss": 1.5192,
      "step": 10608
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.88356351852417,
      "learning_rate": 1.7399297248482204e-05,
      "loss": 0.8613,
      "step": 10609
    },
    {
      "epoch": 0.81,
      "grad_norm": 12.249421119689941,
      "learning_rate": 1.7385776986940604e-05,
      "loss": 1.5941,
      "step": 10610
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3373901844024658,
      "learning_rate": 1.7372261480322826e-05,
      "loss": 0.8726,
      "step": 10611
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7136406898498535,
      "learning_rate": 1.7358750729406702e-05,
      "loss": 1.478,
      "step": 10612
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.343491315841675,
      "learning_rate": 1.734524473496988e-05,
      "loss": 1.3294,
      "step": 10613
    },
    {
      "epoch": 0.81,
      "grad_norm": 6.2946882247924805,
      "learning_rate": 1.7331743497789743e-05,
      "loss": 3.6035,
      "step": 10614
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9008533954620361,
      "learning_rate": 1.731824701864331e-05,
      "loss": 1.0157,
      "step": 10615
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1330448389053345,
      "learning_rate": 1.7304755298307417e-05,
      "loss": 1.3904,
      "step": 10616
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5791873931884766,
      "learning_rate": 1.729126833755855e-05,
      "loss": 1.5413,
      "step": 10617
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.288912296295166,
      "learning_rate": 1.7277786137173026e-05,
      "loss": 1.262,
      "step": 10618
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3110469579696655,
      "learning_rate": 1.7264308697926744e-05,
      "loss": 1.601,
      "step": 10619
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3316270112991333,
      "learning_rate": 1.7250836020595484e-05,
      "loss": 0.8992,
      "step": 10620
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0671626329421997,
      "learning_rate": 1.723736810595461e-05,
      "loss": 0.99,
      "step": 10621
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4035307168960571,
      "learning_rate": 1.7223904954779313e-05,
      "loss": 1.8346,
      "step": 10622
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.783879041671753,
      "learning_rate": 1.7210446567844497e-05,
      "loss": 1.0967,
      "step": 10623
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3665146827697754,
      "learning_rate": 1.7196992945924718e-05,
      "loss": 0.8297,
      "step": 10624
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1014162302017212,
      "learning_rate": 1.7183544089794335e-05,
      "loss": 1.4827,
      "step": 10625
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.658097267150879,
      "learning_rate": 1.717010000022743e-05,
      "loss": 1.5206,
      "step": 10626
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0566524267196655,
      "learning_rate": 1.715666067799776e-05,
      "loss": 1.3447,
      "step": 10627
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.0478713512420654,
      "learning_rate": 1.714322612387882e-05,
      "loss": 2.2499,
      "step": 10628
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0318048000335693,
      "learning_rate": 1.712979633864389e-05,
      "loss": 1.483,
      "step": 10629
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0870373249053955,
      "learning_rate": 1.7116371323065883e-05,
      "loss": 1.2242,
      "step": 10630
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4575822353363037,
      "learning_rate": 1.7102951077917505e-05,
      "loss": 1.0255,
      "step": 10631
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0781327486038208,
      "learning_rate": 1.7089535603971207e-05,
      "loss": 1.2808,
      "step": 10632
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0359692573547363,
      "learning_rate": 1.7076124901999056e-05,
      "loss": 1.4923,
      "step": 10633
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2280170917510986,
      "learning_rate": 1.7062718972772963e-05,
      "loss": 1.1042,
      "step": 10634
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3039034605026245,
      "learning_rate": 1.7049317817064525e-05,
      "loss": 1.3229,
      "step": 10635
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.247635841369629,
      "learning_rate": 1.703592143564503e-05,
      "loss": 1.8931,
      "step": 10636
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1951836347579956,
      "learning_rate": 1.7022529829285504e-05,
      "loss": 1.9067,
      "step": 10637
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.664808988571167,
      "learning_rate": 1.7009142998756756e-05,
      "loss": 1.9583,
      "step": 10638
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0369290113449097,
      "learning_rate": 1.699576094482923e-05,
      "loss": 1.579,
      "step": 10639
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8652846813201904,
      "learning_rate": 1.698238366827315e-05,
      "loss": 1.1676,
      "step": 10640
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9896613359451294,
      "learning_rate": 1.6969011169858485e-05,
      "loss": 1.9075,
      "step": 10641
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8914210796356201,
      "learning_rate": 1.6955643450354863e-05,
      "loss": 1.042,
      "step": 10642
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5751798152923584,
      "learning_rate": 1.6942280510531684e-05,
      "loss": 0.6236,
      "step": 10643
    },
    {
      "epoch": 0.81,
      "grad_norm": 5.437828540802002,
      "learning_rate": 1.6928922351158095e-05,
      "loss": 1.8096,
      "step": 10644
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4404960870742798,
      "learning_rate": 1.6915568973002905e-05,
      "loss": 1.2638,
      "step": 10645
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9183326959609985,
      "learning_rate": 1.6902220376834655e-05,
      "loss": 1.3063,
      "step": 10646
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3257267475128174,
      "learning_rate": 1.6888876563421672e-05,
      "loss": 1.2625,
      "step": 10647
    },
    {
      "epoch": 0.81,
      "grad_norm": 4.756226539611816,
      "learning_rate": 1.6875537533531948e-05,
      "loss": 2.0181,
      "step": 10648
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2877068519592285,
      "learning_rate": 1.6862203287933208e-05,
      "loss": 0.9796,
      "step": 10649
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3576864004135132,
      "learning_rate": 1.6848873827392974e-05,
      "loss": 1.7076,
      "step": 10650
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1661673784255981,
      "learning_rate": 1.6835549152678365e-05,
      "loss": 0.7827,
      "step": 10651
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3972938060760498,
      "learning_rate": 1.682222926455632e-05,
      "loss": 0.9999,
      "step": 10652
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3019108772277832,
      "learning_rate": 1.680891416379351e-05,
      "loss": 1.4604,
      "step": 10653
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4351027011871338,
      "learning_rate": 1.6795603851156226e-05,
      "loss": 1.6695,
      "step": 10654
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.438347578048706,
      "learning_rate": 1.6782298327410616e-05,
      "loss": 0.9994,
      "step": 10655
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.729248285293579,
      "learning_rate": 1.6768997593322466e-05,
      "loss": 1.8383,
      "step": 10656
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9420887231826782,
      "learning_rate": 1.6755701649657275e-05,
      "loss": 1.7383,
      "step": 10657
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7901716232299805,
      "learning_rate": 1.674241049718034e-05,
      "loss": 1.9571,
      "step": 10658
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6888482570648193,
      "learning_rate": 1.672912413665666e-05,
      "loss": 0.9978,
      "step": 10659
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.0860601663589478,
      "learning_rate": 1.6715842568850893e-05,
      "loss": 1.4002,
      "step": 10660
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.428499460220337,
      "learning_rate": 1.670256579452748e-05,
      "loss": 1.517,
      "step": 10661
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.111033320426941,
      "learning_rate": 1.6689293814450636e-05,
      "loss": 0.9156,
      "step": 10662
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.12067449092865,
      "learning_rate": 1.6676026629384157e-05,
      "loss": 1.5696,
      "step": 10663
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.8304721713066101,
      "learning_rate": 1.66627642400917e-05,
      "loss": 0.9836,
      "step": 10664
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.7224807739257812,
      "learning_rate": 1.664950664733659e-05,
      "loss": 0.8515,
      "step": 10665
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3651057481765747,
      "learning_rate": 1.663625385188182e-05,
      "loss": 1.3579,
      "step": 10666
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.9838363528251648,
      "learning_rate": 1.6623005854490215e-05,
      "loss": 1.759,
      "step": 10667
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1790130138397217,
      "learning_rate": 1.6609762655924287e-05,
      "loss": 1.0547,
      "step": 10668
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.986972451210022,
      "learning_rate": 1.659652425694621e-05,
      "loss": 1.6418,
      "step": 10669
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3650496006011963,
      "learning_rate": 1.6583290658317964e-05,
      "loss": 1.3319,
      "step": 10670
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1180689334869385,
      "learning_rate": 1.6570061860801233e-05,
      "loss": 1.276,
      "step": 10671
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.1272974014282227,
      "learning_rate": 1.655683786515736e-05,
      "loss": 1.1101,
      "step": 10672
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2577542066574097,
      "learning_rate": 1.654361867214752e-05,
      "loss": 1.8581,
      "step": 10673
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0296459197998047,
      "learning_rate": 1.653040428253252e-05,
      "loss": 1.387,
      "step": 10674
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4078423976898193,
      "learning_rate": 1.65171946970729e-05,
      "loss": 1.0743,
      "step": 10675
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6718305349349976,
      "learning_rate": 1.650398991652897e-05,
      "loss": 0.9561,
      "step": 10676
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.799330472946167,
      "learning_rate": 1.649078994166079e-05,
      "loss": 1.8654,
      "step": 10677
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9023866653442383,
      "learning_rate": 1.6477594773228012e-05,
      "loss": 1.1249,
      "step": 10678
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4767272472381592,
      "learning_rate": 1.6464404411990143e-05,
      "loss": 0.782,
      "step": 10679
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.569623589515686,
      "learning_rate": 1.6451218858706374e-05,
      "loss": 1.6366,
      "step": 10680
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2105088233947754,
      "learning_rate": 1.6438038114135557e-05,
      "loss": 1.6479,
      "step": 10681
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3078593015670776,
      "learning_rate": 1.642486217903636e-05,
      "loss": 1.0558,
      "step": 10682
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2330071926116943,
      "learning_rate": 1.6411691054167157e-05,
      "loss": 1.3272,
      "step": 10683
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4091506004333496,
      "learning_rate": 1.639852474028598e-05,
      "loss": 1.3985,
      "step": 10684
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4431731700897217,
      "learning_rate": 1.638536323815062e-05,
      "loss": 1.1385,
      "step": 10685
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.825731039047241,
      "learning_rate": 1.637220654851863e-05,
      "loss": 1.6304,
      "step": 10686
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0814299583435059,
      "learning_rate": 1.635905467214722e-05,
      "loss": 1.1311,
      "step": 10687
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.1789419651031494,
      "learning_rate": 1.6345907609793376e-05,
      "loss": 0.8952,
      "step": 10688
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0669970512390137,
      "learning_rate": 1.6332765362213797e-05,
      "loss": 1.4106,
      "step": 10689
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1388769149780273,
      "learning_rate": 1.631962793016487e-05,
      "loss": 1.4069,
      "step": 10690
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5192707777023315,
      "learning_rate": 1.6306495314402747e-05,
      "loss": 1.4993,
      "step": 10691
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3152351379394531,
      "learning_rate": 1.629336751568329e-05,
      "loss": 1.6124,
      "step": 10692
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1185014247894287,
      "learning_rate": 1.628024453476208e-05,
      "loss": 1.5452,
      "step": 10693
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.7376277446746826,
      "learning_rate": 1.6267126372394393e-05,
      "loss": 0.908,
      "step": 10694
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4432271718978882,
      "learning_rate": 1.625401302933528e-05,
      "loss": 0.9132,
      "step": 10695
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3737701177597046,
      "learning_rate": 1.624090450633947e-05,
      "loss": 1.7229,
      "step": 10696
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1571273803710938,
      "learning_rate": 1.6227800804161443e-05,
      "loss": 1.2908,
      "step": 10697
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5513150691986084,
      "learning_rate": 1.621470192355541e-05,
      "loss": 0.8788,
      "step": 10698
    },
    {
      "epoch": 0.82,
      "grad_norm": 4.532405853271484,
      "learning_rate": 1.620160786527526e-05,
      "loss": 1.9176,
      "step": 10699
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1295700073242188,
      "learning_rate": 1.6188518630074635e-05,
      "loss": 1.2495,
      "step": 10700
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4726685285568237,
      "learning_rate": 1.617543421870693e-05,
      "loss": 1.814,
      "step": 10701
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2787421941757202,
      "learning_rate": 1.6162354631925204e-05,
      "loss": 1.1045,
      "step": 10702
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.953380584716797,
      "learning_rate": 1.614927987048224e-05,
      "loss": 2.2017,
      "step": 10703
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.536885142326355,
      "learning_rate": 1.613620993513061e-05,
      "loss": 1.6401,
      "step": 10704
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9106417894363403,
      "learning_rate": 1.6123144826622504e-05,
      "loss": 1.8227,
      "step": 10705
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.305999517440796,
      "learning_rate": 1.6110084545709934e-05,
      "loss": 0.7488,
      "step": 10706
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4254580736160278,
      "learning_rate": 1.6097029093144623e-05,
      "loss": 1.4871,
      "step": 10707
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.294177770614624,
      "learning_rate": 1.608397846967793e-05,
      "loss": 2.0041,
      "step": 10708
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.978656530380249,
      "learning_rate": 1.6070932676061014e-05,
      "loss": 1.2848,
      "step": 10709
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7427364587783813,
      "learning_rate": 1.605789171304477e-05,
      "loss": 0.987,
      "step": 10710
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6258044242858887,
      "learning_rate": 1.6044855581379725e-05,
      "loss": 0.8616,
      "step": 10711
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3899868726730347,
      "learning_rate": 1.6031824281816234e-05,
      "loss": 1.4756,
      "step": 10712
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7647969722747803,
      "learning_rate": 1.6018797815104293e-05,
      "loss": 1.5094,
      "step": 10713
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4139844179153442,
      "learning_rate": 1.6005776181993627e-05,
      "loss": 1.6395,
      "step": 10714
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6344661712646484,
      "learning_rate": 1.5992759383233747e-05,
      "loss": 1.5886,
      "step": 10715
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8360283374786377,
      "learning_rate": 1.5979747419573844e-05,
      "loss": 0.9365,
      "step": 10716
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.32644784450531,
      "learning_rate": 1.5966740291762806e-05,
      "loss": 1.3648,
      "step": 10717
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5523372888565063,
      "learning_rate": 1.5953738000549267e-05,
      "loss": 1.1254,
      "step": 10718
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.273756980895996,
      "learning_rate": 1.594074054668161e-05,
      "loss": 1.3097,
      "step": 10719
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4618839025497437,
      "learning_rate": 1.592774793090792e-05,
      "loss": 1.703,
      "step": 10720
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3484818935394287,
      "learning_rate": 1.5914760153975983e-05,
      "loss": 1.346,
      "step": 10721
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.242445230484009,
      "learning_rate": 1.5901777216633285e-05,
      "loss": 1.6111,
      "step": 10722
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4510655403137207,
      "learning_rate": 1.588879911962713e-05,
      "loss": 1.5765,
      "step": 10723
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8269476890563965,
      "learning_rate": 1.587582586370442e-05,
      "loss": 1.4044,
      "step": 10724
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2740538120269775,
      "learning_rate": 1.5862857449611867e-05,
      "loss": 1.2476,
      "step": 10725
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3579723834991455,
      "learning_rate": 1.5849893878095912e-05,
      "loss": 1.3561,
      "step": 10726
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9859018325805664,
      "learning_rate": 1.5836935149902632e-05,
      "loss": 1.7097,
      "step": 10727
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6313804388046265,
      "learning_rate": 1.58239812657779e-05,
      "loss": 1.5949,
      "step": 10728
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0871180295944214,
      "learning_rate": 1.5811032226467305e-05,
      "loss": 1.7488,
      "step": 10729
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3428822755813599,
      "learning_rate": 1.579808803271612e-05,
      "loss": 1.4616,
      "step": 10730
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9985353946685791,
      "learning_rate": 1.5785148685269323e-05,
      "loss": 1.3364,
      "step": 10731
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.204523801803589,
      "learning_rate": 1.5772214184871713e-05,
      "loss": 1.4781,
      "step": 10732
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5715514421463013,
      "learning_rate": 1.5759284532267694e-05,
      "loss": 1.4444,
      "step": 10733
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3306596279144287,
      "learning_rate": 1.5746359728201455e-05,
      "loss": 1.1593,
      "step": 10734
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3004318475723267,
      "learning_rate": 1.5733439773416915e-05,
      "loss": 0.9371,
      "step": 10735
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1740326881408691,
      "learning_rate": 1.5720524668657653e-05,
      "loss": 1.3529,
      "step": 10736
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6853166818618774,
      "learning_rate": 1.570761441466704e-05,
      "loss": 1.2442,
      "step": 10737
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.069812297821045,
      "learning_rate": 1.5694709012188135e-05,
      "loss": 1.3834,
      "step": 10738
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.97203528881073,
      "learning_rate": 1.568180846196372e-05,
      "loss": 1.7842,
      "step": 10739
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3371310234069824,
      "learning_rate": 1.5668912764736254e-05,
      "loss": 1.4529,
      "step": 10740
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5649775266647339,
      "learning_rate": 1.565602192124802e-05,
      "loss": 1.1444,
      "step": 10741
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5313363075256348,
      "learning_rate": 1.5643135932240892e-05,
      "loss": 1.6976,
      "step": 10742
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2594597339630127,
      "learning_rate": 1.563025479845658e-05,
      "loss": 1.3458,
      "step": 10743
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4322903156280518,
      "learning_rate": 1.5617378520636482e-05,
      "loss": 1.3754,
      "step": 10744
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4195048809051514,
      "learning_rate": 1.5604507099521647e-05,
      "loss": 1.7048,
      "step": 10745
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0995737314224243,
      "learning_rate": 1.5591640535852935e-05,
      "loss": 1.3048,
      "step": 10746
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.389345407485962,
      "learning_rate": 1.5578778830370898e-05,
      "loss": 2.0901,
      "step": 10747
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6413989067077637,
      "learning_rate": 1.556592198381578e-05,
      "loss": 1.6075,
      "step": 10748
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1019489765167236,
      "learning_rate": 1.5553069996927584e-05,
      "loss": 1.3276,
      "step": 10749
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9216324687004089,
      "learning_rate": 1.554022287044602e-05,
      "loss": 0.8128,
      "step": 10750
    },
    {
      "epoch": 0.82,
      "grad_norm": 5.688291549682617,
      "learning_rate": 1.5527380605110463e-05,
      "loss": 1.3396,
      "step": 10751
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5049402713775635,
      "learning_rate": 1.5514543201660103e-05,
      "loss": 0.53,
      "step": 10752
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7859337329864502,
      "learning_rate": 1.5501710660833823e-05,
      "loss": 1.4134,
      "step": 10753
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4574859142303467,
      "learning_rate": 1.548888298337017e-05,
      "loss": 1.2817,
      "step": 10754
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0585224628448486,
      "learning_rate": 1.5476060170007457e-05,
      "loss": 1.4741,
      "step": 10755
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.546452522277832,
      "learning_rate": 1.5463242221483743e-05,
      "loss": 1.6295,
      "step": 10756
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8137558698654175,
      "learning_rate": 1.5450429138536738e-05,
      "loss": 1.7435,
      "step": 10757
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1348389387130737,
      "learning_rate": 1.543762092190394e-05,
      "loss": 1.7272,
      "step": 10758
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.161002278327942,
      "learning_rate": 1.5424817572322525e-05,
      "loss": 1.1063,
      "step": 10759
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5124685764312744,
      "learning_rate": 1.5412019090529363e-05,
      "loss": 0.86,
      "step": 10760
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.251739740371704,
      "learning_rate": 1.5399225477261116e-05,
      "loss": 1.8417,
      "step": 10761
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6064810752868652,
      "learning_rate": 1.538643673325415e-05,
      "loss": 1.6985,
      "step": 10762
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2342503070831299,
      "learning_rate": 1.537365285924447e-05,
      "loss": 1.3696,
      "step": 10763
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6220247745513916,
      "learning_rate": 1.5360873855967905e-05,
      "loss": 1.6907,
      "step": 10764
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.71065092086792,
      "learning_rate": 1.534809972415998e-05,
      "loss": 1.0077,
      "step": 10765
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8237208127975464,
      "learning_rate": 1.5335330464555864e-05,
      "loss": 1.2884,
      "step": 10766
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.134259581565857,
      "learning_rate": 1.5322566077890543e-05,
      "loss": 0.8441,
      "step": 10767
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6979236602783203,
      "learning_rate": 1.5309806564898677e-05,
      "loss": 1.1258,
      "step": 10768
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4938088655471802,
      "learning_rate": 1.529705192631462e-05,
      "loss": 1.1443,
      "step": 10769
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.0636138916015625,
      "learning_rate": 1.5284302162872487e-05,
      "loss": 1.0803,
      "step": 10770
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7568913698196411,
      "learning_rate": 1.5271557275306125e-05,
      "loss": 1.2268,
      "step": 10771
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2655339241027832,
      "learning_rate": 1.5258817264349046e-05,
      "loss": 1.5256,
      "step": 10772
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6846867799758911,
      "learning_rate": 1.5246082130734518e-05,
      "loss": 1.1716,
      "step": 10773
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5667484998703003,
      "learning_rate": 1.5233351875195546e-05,
      "loss": 1.3671,
      "step": 10774
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7078512907028198,
      "learning_rate": 1.5220626498464796e-05,
      "loss": 1.6893,
      "step": 10775
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.006253957748413,
      "learning_rate": 1.5207906001274719e-05,
      "loss": 1.281,
      "step": 10776
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.760204553604126,
      "learning_rate": 1.5195190384357404e-05,
      "loss": 1.3924,
      "step": 10777
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5423095226287842,
      "learning_rate": 1.518247964844477e-05,
      "loss": 1.9056,
      "step": 10778
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.955064058303833,
      "learning_rate": 1.5169773794268327e-05,
      "loss": 1.5447,
      "step": 10779
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.984945058822632,
      "learning_rate": 1.5157072822559437e-05,
      "loss": 1.8778,
      "step": 10780
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1700756549835205,
      "learning_rate": 1.5144376734049048e-05,
      "loss": 1.1853,
      "step": 10781
    },
    {
      "epoch": 0.82,
      "grad_norm": 4.97192907333374,
      "learning_rate": 1.5131685529467942e-05,
      "loss": 1.5343,
      "step": 10782
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6274373531341553,
      "learning_rate": 1.5118999209546559e-05,
      "loss": 0.8853,
      "step": 10783
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1915701627731323,
      "learning_rate": 1.5106317775015055e-05,
      "loss": 1.0721,
      "step": 10784
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0795748233795166,
      "learning_rate": 1.5093641226603327e-05,
      "loss": 1.4982,
      "step": 10785
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2205846309661865,
      "learning_rate": 1.5080969565041004e-05,
      "loss": 0.9611,
      "step": 10786
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.336593508720398,
      "learning_rate": 1.5068302791057398e-05,
      "loss": 1.4952,
      "step": 10787
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9729036092758179,
      "learning_rate": 1.505564090538153e-05,
      "loss": 1.6551,
      "step": 10788
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2261754274368286,
      "learning_rate": 1.5042983908742215e-05,
      "loss": 1.3558,
      "step": 10789
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8365670442581177,
      "learning_rate": 1.503033180186787e-05,
      "loss": 1.3831,
      "step": 10790
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.130758762359619,
      "learning_rate": 1.5017684585486747e-05,
      "loss": 1.6782,
      "step": 10791
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1559895277023315,
      "learning_rate": 1.5005042260326763e-05,
      "loss": 1.0104,
      "step": 10792
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.389416217803955,
      "learning_rate": 1.4992404827115537e-05,
      "loss": 1.1235,
      "step": 10793
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5391032695770264,
      "learning_rate": 1.4979772286580428e-05,
      "loss": 1.4013,
      "step": 10794
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.9847193360328674,
      "learning_rate": 1.4967144639448538e-05,
      "loss": 0.9912,
      "step": 10795
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.8227435946464539,
      "learning_rate": 1.4954521886446649e-05,
      "loss": 1.2086,
      "step": 10796
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3243086338043213,
      "learning_rate": 1.4941904028301235e-05,
      "loss": 1.0058,
      "step": 10797
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.512024998664856,
      "learning_rate": 1.4929291065738583e-05,
      "loss": 1.0362,
      "step": 10798
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0652005672454834,
      "learning_rate": 1.49166829994846e-05,
      "loss": 0.97,
      "step": 10799
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8811448812484741,
      "learning_rate": 1.490407983026496e-05,
      "loss": 1.1219,
      "step": 10800
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.4904022216796875,
      "learning_rate": 1.4891481558805077e-05,
      "loss": 2.0933,
      "step": 10801
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5032119750976562,
      "learning_rate": 1.4878888185830009e-05,
      "loss": 1.7406,
      "step": 10802
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8760062456130981,
      "learning_rate": 1.4866299712064602e-05,
      "loss": 2.0226,
      "step": 10803
    },
    {
      "epoch": 0.82,
      "grad_norm": 4.469114780426025,
      "learning_rate": 1.485371613823342e-05,
      "loss": 1.7876,
      "step": 10804
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.574844479560852,
      "learning_rate": 1.4841137465060672e-05,
      "loss": 1.5048,
      "step": 10805
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.351583242416382,
      "learning_rate": 1.4828563693270381e-05,
      "loss": 1.4213,
      "step": 10806
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4634404182434082,
      "learning_rate": 1.481599482358621e-05,
      "loss": 1.4708,
      "step": 10807
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2830944061279297,
      "learning_rate": 1.4803430856731549e-05,
      "loss": 1.4835,
      "step": 10808
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8096206188201904,
      "learning_rate": 1.479087179342955e-05,
      "loss": 1.68,
      "step": 10809
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5187549591064453,
      "learning_rate": 1.4778317634403083e-05,
      "loss": 1.2905,
      "step": 10810
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.246626138687134,
      "learning_rate": 1.4765768380374678e-05,
      "loss": 1.0673,
      "step": 10811
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.950950264930725,
      "learning_rate": 1.4753224032066614e-05,
      "loss": 1.3723,
      "step": 10812
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1996666193008423,
      "learning_rate": 1.4740684590200937e-05,
      "loss": 1.1328,
      "step": 10813
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0157594680786133,
      "learning_rate": 1.4728150055499302e-05,
      "loss": 1.8778,
      "step": 10814
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8396861553192139,
      "learning_rate": 1.4715620428683208e-05,
      "loss": 2.1364,
      "step": 10815
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.153346300125122,
      "learning_rate": 1.4703095710473758e-05,
      "loss": 1.6443,
      "step": 10816
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6385295391082764,
      "learning_rate": 1.469057590159183e-05,
      "loss": 1.4032,
      "step": 10817
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.398857593536377,
      "learning_rate": 1.4678061002758003e-05,
      "loss": 1.0334,
      "step": 10818
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5233728885650635,
      "learning_rate": 1.4665551014692625e-05,
      "loss": 1.4094,
      "step": 10819
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.636377215385437,
      "learning_rate": 1.4653045938115662e-05,
      "loss": 1.2742,
      "step": 10820
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3918235301971436,
      "learning_rate": 1.4640545773746884e-05,
      "loss": 1.4298,
      "step": 10821
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7929041385650635,
      "learning_rate": 1.4628050522305758e-05,
      "loss": 1.798,
      "step": 10822
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.865617275238037,
      "learning_rate": 1.4615560184511423e-05,
      "loss": 1.5452,
      "step": 10823
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9347051978111267,
      "learning_rate": 1.4603074761082813e-05,
      "loss": 0.9186,
      "step": 10824
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4755864143371582,
      "learning_rate": 1.4590594252738522e-05,
      "loss": 1.25,
      "step": 10825
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1901183128356934,
      "learning_rate": 1.4578118660196826e-05,
      "loss": 2.104,
      "step": 10826
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9438204765319824,
      "learning_rate": 1.4565647984175811e-05,
      "loss": 1.7372,
      "step": 10827
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1329959630966187,
      "learning_rate": 1.4553182225393259e-05,
      "loss": 1.6284,
      "step": 10828
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.102288842201233,
      "learning_rate": 1.4540721384566602e-05,
      "loss": 1.1376,
      "step": 10829
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0605003833770752,
      "learning_rate": 1.4528265462413038e-05,
      "loss": 1.4045,
      "step": 10830
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.0639901161193848,
      "learning_rate": 1.4515814459649513e-05,
      "loss": 1.0907,
      "step": 10831
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5103490352630615,
      "learning_rate": 1.450336837699261e-05,
      "loss": 1.075,
      "step": 10832
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0915052890777588,
      "learning_rate": 1.4490927215158711e-05,
      "loss": 1.076,
      "step": 10833
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2775206565856934,
      "learning_rate": 1.4478490974863846e-05,
      "loss": 1.449,
      "step": 10834
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6900010108947754,
      "learning_rate": 1.4466059656823816e-05,
      "loss": 1.4218,
      "step": 10835
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2755335569381714,
      "learning_rate": 1.4453633261754084e-05,
      "loss": 0.7358,
      "step": 10836
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4875119924545288,
      "learning_rate": 1.444121179036989e-05,
      "loss": 1.6293,
      "step": 10837
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9891355037689209,
      "learning_rate": 1.442879524338614e-05,
      "loss": 1.7921,
      "step": 10838
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4008588790893555,
      "learning_rate": 1.4416383621517493e-05,
      "loss": 1.0975,
      "step": 10839
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.852485179901123,
      "learning_rate": 1.4403976925478312e-05,
      "loss": 1.4385,
      "step": 10840
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1687657833099365,
      "learning_rate": 1.439157515598265e-05,
      "loss": 1.7619,
      "step": 10841
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5231074094772339,
      "learning_rate": 1.4379178313744323e-05,
      "loss": 1.598,
      "step": 10842
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8131449818611145,
      "learning_rate": 1.436678639947685e-05,
      "loss": 0.9732,
      "step": 10843
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.764658808708191,
      "learning_rate": 1.4354399413893437e-05,
      "loss": 1.2251,
      "step": 10844
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5484057664871216,
      "learning_rate": 1.4342017357707015e-05,
      "loss": 1.5894,
      "step": 10845
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2575716972351074,
      "learning_rate": 1.432964023163028e-05,
      "loss": 0.9025,
      "step": 10846
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9598517417907715,
      "learning_rate": 1.4317268036375552e-05,
      "loss": 1.1602,
      "step": 10847
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6963244676589966,
      "learning_rate": 1.4304900772654961e-05,
      "loss": 1.2828,
      "step": 10848
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.022634267807007,
      "learning_rate": 1.4292538441180336e-05,
      "loss": 1.101,
      "step": 10849
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8926215171813965,
      "learning_rate": 1.4280181042663143e-05,
      "loss": 1.421,
      "step": 10850
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5733067989349365,
      "learning_rate": 1.4267828577814646e-05,
      "loss": 1.5444,
      "step": 10851
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4411731958389282,
      "learning_rate": 1.425548104734583e-05,
      "loss": 1.3693,
      "step": 10852
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2453949451446533,
      "learning_rate": 1.4243138451967341e-05,
      "loss": 1.1697,
      "step": 10853
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6441951990127563,
      "learning_rate": 1.4230800792389554e-05,
      "loss": 1.3742,
      "step": 10854
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5813237428665161,
      "learning_rate": 1.4218468069322578e-05,
      "loss": 1.2054,
      "step": 10855
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5701687335968018,
      "learning_rate": 1.4206140283476254e-05,
      "loss": 1.8338,
      "step": 10856
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5040621757507324,
      "learning_rate": 1.4193817435560085e-05,
      "loss": 1.3104,
      "step": 10857
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1449302434921265,
      "learning_rate": 1.4181499526283337e-05,
      "loss": 1.8518,
      "step": 10858
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.598670244216919,
      "learning_rate": 1.4169186556355007e-05,
      "loss": 1.7872,
      "step": 10859
    },
    {
      "epoch": 0.83,
      "grad_norm": 5.948017120361328,
      "learning_rate": 1.415687852648372e-05,
      "loss": 2.6934,
      "step": 10860
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.2431342601776123,
      "learning_rate": 1.4144575437377927e-05,
      "loss": 1.2337,
      "step": 10861
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.134077787399292,
      "learning_rate": 1.413227728974571e-05,
      "loss": 0.8261,
      "step": 10862
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2281101942062378,
      "learning_rate": 1.4119984084294891e-05,
      "loss": 1.4808,
      "step": 10863
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8324741125106812,
      "learning_rate": 1.4107695821733025e-05,
      "loss": 1.3699,
      "step": 10864
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.677463173866272,
      "learning_rate": 1.4095412502767402e-05,
      "loss": 1.0238,
      "step": 10865
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5931808948516846,
      "learning_rate": 1.4083134128104947e-05,
      "loss": 0.4452,
      "step": 10866
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.035880446434021,
      "learning_rate": 1.407086069845237e-05,
      "loss": 1.3575,
      "step": 10867
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7980093955993652,
      "learning_rate": 1.405859221451612e-05,
      "loss": 2.0603,
      "step": 10868
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3053653240203857,
      "learning_rate": 1.404632867700224e-05,
      "loss": 1.2134,
      "step": 10869
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8039202690124512,
      "learning_rate": 1.4034070086616647e-05,
      "loss": 1.3532,
      "step": 10870
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1053566932678223,
      "learning_rate": 1.4021816444064828e-05,
      "loss": 1.2219,
      "step": 10871
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.77067232131958,
      "learning_rate": 1.40095677500521e-05,
      "loss": 1.7884,
      "step": 10872
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2343002557754517,
      "learning_rate": 1.3997324005283408e-05,
      "loss": 1.5068,
      "step": 10873
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5424959659576416,
      "learning_rate": 1.3985085210463477e-05,
      "loss": 0.5092,
      "step": 10874
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1998953819274902,
      "learning_rate": 1.3972851366296692e-05,
      "loss": 1.0596,
      "step": 10875
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7673335075378418,
      "learning_rate": 1.3960622473487194e-05,
      "loss": 1.1725,
      "step": 10876
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7937633991241455,
      "learning_rate": 1.394839853273886e-05,
      "loss": 1.1953,
      "step": 10877
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7192093133926392,
      "learning_rate": 1.3936179544755191e-05,
      "loss": 1.302,
      "step": 10878
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.812632441520691,
      "learning_rate": 1.392396551023949e-05,
      "loss": 1.3349,
      "step": 10879
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8171533346176147,
      "learning_rate": 1.3911756429894763e-05,
      "loss": 1.0084,
      "step": 10880
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.185773253440857,
      "learning_rate": 1.389955230442369e-05,
      "loss": 1.4272,
      "step": 10881
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1075003147125244,
      "learning_rate": 1.388735313452868e-05,
      "loss": 1.1182,
      "step": 10882
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6265920400619507,
      "learning_rate": 1.387515892091189e-05,
      "loss": 1.0788,
      "step": 10883
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9211146831512451,
      "learning_rate": 1.3862969664275127e-05,
      "loss": 1.9377,
      "step": 10884
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1368659734725952,
      "learning_rate": 1.3850785365319984e-05,
      "loss": 1.1912,
      "step": 10885
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9937068223953247,
      "learning_rate": 1.3838606024747746e-05,
      "loss": 1.5181,
      "step": 10886
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1805126667022705,
      "learning_rate": 1.3826431643259385e-05,
      "loss": 1.1108,
      "step": 10887
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6907185316085815,
      "learning_rate": 1.3814262221555596e-05,
      "loss": 1.5228,
      "step": 10888
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6918790340423584,
      "learning_rate": 1.3802097760336851e-05,
      "loss": 0.5728,
      "step": 10889
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.8092750310897827,
      "learning_rate": 1.3789938260303237e-05,
      "loss": 0.9258,
      "step": 10890
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1600615978240967,
      "learning_rate": 1.3777783722154603e-05,
      "loss": 1.6174,
      "step": 10891
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2170039415359497,
      "learning_rate": 1.3765634146590545e-05,
      "loss": 1.526,
      "step": 10892
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2874335050582886,
      "learning_rate": 1.3753489534310304e-05,
      "loss": 1.2538,
      "step": 10893
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7808098793029785,
      "learning_rate": 1.3741349886012877e-05,
      "loss": 1.5415,
      "step": 10894
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4539536237716675,
      "learning_rate": 1.3729215202397017e-05,
      "loss": 1.0584,
      "step": 10895
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5995813608169556,
      "learning_rate": 1.3717085484161085e-05,
      "loss": 1.4304,
      "step": 10896
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6935869455337524,
      "learning_rate": 1.3704960732003236e-05,
      "loss": 1.331,
      "step": 10897
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2566769123077393,
      "learning_rate": 1.3692840946621355e-05,
      "loss": 1.1395,
      "step": 10898
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1243436336517334,
      "learning_rate": 1.3680726128712972e-05,
      "loss": 1.9882,
      "step": 10899
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2349334955215454,
      "learning_rate": 1.3668616278975343e-05,
      "loss": 1.2082,
      "step": 10900
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.0454301834106445,
      "learning_rate": 1.365651139810551e-05,
      "loss": 2.17,
      "step": 10901
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.297400712966919,
      "learning_rate": 1.3644411486800124e-05,
      "loss": 1.4826,
      "step": 10902
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7890350818634033,
      "learning_rate": 1.363231654575564e-05,
      "loss": 1.781,
      "step": 10903
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.524836778640747,
      "learning_rate": 1.3620226575668216e-05,
      "loss": 1.1435,
      "step": 10904
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.082245349884033,
      "learning_rate": 1.3608141577233636e-05,
      "loss": 1.5785,
      "step": 10905
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3361985683441162,
      "learning_rate": 1.3596061551147488e-05,
      "loss": 1.3023,
      "step": 10906
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8741029500961304,
      "learning_rate": 1.3583986498105083e-05,
      "loss": 1.4492,
      "step": 10907
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3192434310913086,
      "learning_rate": 1.3571916418801356e-05,
      "loss": 1.2481,
      "step": 10908
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4755403995513916,
      "learning_rate": 1.355985131393106e-05,
      "loss": 1.1729,
      "step": 10909
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2138400077819824,
      "learning_rate": 1.3547791184188574e-05,
      "loss": 1.5443,
      "step": 10910
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.0788142681121826,
      "learning_rate": 1.3535736030268021e-05,
      "loss": 2.2375,
      "step": 10911
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.54362416267395,
      "learning_rate": 1.3523685852863254e-05,
      "loss": 1.7961,
      "step": 10912
    },
    {
      "epoch": 0.83,
      "grad_norm": 6.324601173400879,
      "learning_rate": 1.3511640652667867e-05,
      "loss": 2.1205,
      "step": 10913
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2176281213760376,
      "learning_rate": 1.3499600430375081e-05,
      "loss": 1.3828,
      "step": 10914
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.0852137804031372,
      "learning_rate": 1.3487565186677897e-05,
      "loss": 0.9707,
      "step": 10915
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1630582809448242,
      "learning_rate": 1.3475534922269039e-05,
      "loss": 1.5772,
      "step": 10916
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.523996591567993,
      "learning_rate": 1.3463509637840865e-05,
      "loss": 1.3143,
      "step": 10917
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9013845920562744,
      "learning_rate": 1.3451489334085554e-05,
      "loss": 1.2545,
      "step": 10918
    },
    {
      "epoch": 0.83,
      "grad_norm": 4.342968463897705,
      "learning_rate": 1.3439474011694919e-05,
      "loss": 1.1956,
      "step": 10919
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9105411767959595,
      "learning_rate": 1.3427463671360496e-05,
      "loss": 0.8854,
      "step": 10920
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.222419023513794,
      "learning_rate": 1.3415458313773554e-05,
      "loss": 1.0833,
      "step": 10921
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2944846153259277,
      "learning_rate": 1.3403457939625107e-05,
      "loss": 1.9689,
      "step": 10922
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5479828119277954,
      "learning_rate": 1.3391462549605793e-05,
      "loss": 1.3968,
      "step": 10923
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6031999588012695,
      "learning_rate": 1.3379472144406036e-05,
      "loss": 1.3077,
      "step": 10924
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1088674068450928,
      "learning_rate": 1.3367486724715983e-05,
      "loss": 1.4814,
      "step": 10925
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.117885947227478,
      "learning_rate": 1.3355506291225417e-05,
      "loss": 1.005,
      "step": 10926
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3600538969039917,
      "learning_rate": 1.334353084462393e-05,
      "loss": 1.0092,
      "step": 10927
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5358349084854126,
      "learning_rate": 1.333156038560075e-05,
      "loss": 1.3071,
      "step": 10928
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.125638246536255,
      "learning_rate": 1.3319594914844823e-05,
      "loss": 1.8224,
      "step": 10929
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2463551759719849,
      "learning_rate": 1.3307634433044846e-05,
      "loss": 1.4056,
      "step": 10930
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3229840993881226,
      "learning_rate": 1.3295678940889245e-05,
      "loss": 1.0816,
      "step": 10931
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4858849048614502,
      "learning_rate": 1.328372843906609e-05,
      "loss": 1.1563,
      "step": 10932
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4551469087600708,
      "learning_rate": 1.327178292826321e-05,
      "loss": 1.5521,
      "step": 10933
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5912034511566162,
      "learning_rate": 1.3259842409168165e-05,
      "loss": 1.5954,
      "step": 10934
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5899230241775513,
      "learning_rate": 1.3247906882468153e-05,
      "loss": 1.4454,
      "step": 10935
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4757505655288696,
      "learning_rate": 1.3235976348850165e-05,
      "loss": 1.2782,
      "step": 10936
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2626382112503052,
      "learning_rate": 1.3224050809000888e-05,
      "loss": 0.7059,
      "step": 10937
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1362709999084473,
      "learning_rate": 1.321213026360667e-05,
      "loss": 2.2015,
      "step": 10938
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.9657152891159058,
      "learning_rate": 1.3200214713353599e-05,
      "loss": 1.1363,
      "step": 10939
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6808499097824097,
      "learning_rate": 1.3188304158927533e-05,
      "loss": 1.2253,
      "step": 10940
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.1934826374053955,
      "learning_rate": 1.317639860101394e-05,
      "loss": 1.7705,
      "step": 10941
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2362444400787354,
      "learning_rate": 1.3164498040298067e-05,
      "loss": 1.3133,
      "step": 10942
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3128719329833984,
      "learning_rate": 1.3152602477464904e-05,
      "loss": 1.5228,
      "step": 10943
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.6843292713165283,
      "learning_rate": 1.3140711913199044e-05,
      "loss": 1.5559,
      "step": 10944
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7029610872268677,
      "learning_rate": 1.3128826348184887e-05,
      "loss": 1.3749,
      "step": 10945
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3927111625671387,
      "learning_rate": 1.3116945783106538e-05,
      "loss": 1.785,
      "step": 10946
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.583162784576416,
      "learning_rate": 1.3105070218647775e-05,
      "loss": 1.8335,
      "step": 10947
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.899811029434204,
      "learning_rate": 1.3093199655492071e-05,
      "loss": 0.6399,
      "step": 10948
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3865715265274048,
      "learning_rate": 1.3081334094322707e-05,
      "loss": 1.4822,
      "step": 10949
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2911262512207031,
      "learning_rate": 1.3069473535822551e-05,
      "loss": 0.9652,
      "step": 10950
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4036959409713745,
      "learning_rate": 1.3057617980674275e-05,
      "loss": 0.9242,
      "step": 10951
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9759557247161865,
      "learning_rate": 1.3045767429560273e-05,
      "loss": 0.8958,
      "step": 10952
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.244972586631775,
      "learning_rate": 1.3033921883162536e-05,
      "loss": 1.7697,
      "step": 10953
    },
    {
      "epoch": 0.84,
      "grad_norm": 5.97389030456543,
      "learning_rate": 1.3022081342162894e-05,
      "loss": 1.5701,
      "step": 10954
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.6477179527282715,
      "learning_rate": 1.3010245807242849e-05,
      "loss": 1.1784,
      "step": 10955
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.916757822036743,
      "learning_rate": 1.2998415279083575e-05,
      "loss": 1.7816,
      "step": 10956
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3471310138702393,
      "learning_rate": 1.2986589758365986e-05,
      "loss": 1.6896,
      "step": 10957
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2538104057312012,
      "learning_rate": 1.2974769245770723e-05,
      "loss": 0.8194,
      "step": 10958
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.154361367225647,
      "learning_rate": 1.2962953741978113e-05,
      "loss": 1.4625,
      "step": 10959
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.122776746749878,
      "learning_rate": 1.2951143247668197e-05,
      "loss": 1.63,
      "step": 10960
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.445713996887207,
      "learning_rate": 1.2939337763520787e-05,
      "loss": 1.2656,
      "step": 10961
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2831058502197266,
      "learning_rate": 1.2927537290215286e-05,
      "loss": 1.2132,
      "step": 10962
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.726308822631836,
      "learning_rate": 1.2915741828430928e-05,
      "loss": 1.5261,
      "step": 10963
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.777461290359497,
      "learning_rate": 1.2903951378846612e-05,
      "loss": 1.9357,
      "step": 10964
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5115972757339478,
      "learning_rate": 1.28921659421409e-05,
      "loss": 1.3957,
      "step": 10965
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8247379064559937,
      "learning_rate": 1.2880385518992178e-05,
      "loss": 1.1243,
      "step": 10966
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4262783527374268,
      "learning_rate": 1.2868610110078428e-05,
      "loss": 0.9947,
      "step": 10967
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1037733554840088,
      "learning_rate": 1.2856839716077395e-05,
      "loss": 1.4902,
      "step": 10968
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.7023587226867676,
      "learning_rate": 1.284507433766654e-05,
      "loss": 1.8193,
      "step": 10969
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2022366523742676,
      "learning_rate": 1.2833313975523053e-05,
      "loss": 1.2758,
      "step": 10970
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0511291027069092,
      "learning_rate": 1.2821558630323772e-05,
      "loss": 0.8657,
      "step": 10971
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2199212312698364,
      "learning_rate": 1.2809808302745297e-05,
      "loss": 1.4564,
      "step": 10972
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.444042682647705,
      "learning_rate": 1.2798062993463956e-05,
      "loss": 1.4338,
      "step": 10973
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0324478149414062,
      "learning_rate": 1.2786322703155707e-05,
      "loss": 1.0778,
      "step": 10974
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6507700681686401,
      "learning_rate": 1.2774587432496321e-05,
      "loss": 1.7314,
      "step": 10975
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0178207159042358,
      "learning_rate": 1.2762857182161213e-05,
      "loss": 1.5834,
      "step": 10976
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.233500361442566,
      "learning_rate": 1.2751131952825501e-05,
      "loss": 1.1078,
      "step": 10977
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.506697654724121,
      "learning_rate": 1.2739411745164064e-05,
      "loss": 1.3295,
      "step": 10978
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3876625299453735,
      "learning_rate": 1.2727696559851488e-05,
      "loss": 1.304,
      "step": 10979
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9108871221542358,
      "learning_rate": 1.2715986397561997e-05,
      "loss": 0.8456,
      "step": 10980
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6453090906143188,
      "learning_rate": 1.2704281258969598e-05,
      "loss": 0.8465,
      "step": 10981
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8977683782577515,
      "learning_rate": 1.2692581144748028e-05,
      "loss": 0.8617,
      "step": 10982
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.04718279838562,
      "learning_rate": 1.2680886055570651e-05,
      "loss": 1.0041,
      "step": 10983
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2863537073135376,
      "learning_rate": 1.2669195992110604e-05,
      "loss": 1.2196,
      "step": 10984
    },
    {
      "epoch": 0.84,
      "grad_norm": 4.125217914581299,
      "learning_rate": 1.265751095504073e-05,
      "loss": 2.0349,
      "step": 10985
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.13765287399292,
      "learning_rate": 1.2645830945033532e-05,
      "loss": 1.6698,
      "step": 10986
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.0804755687713623,
      "learning_rate": 1.2634155962761274e-05,
      "loss": 1.72,
      "step": 10987
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1654484272003174,
      "learning_rate": 1.2622486008895962e-05,
      "loss": 1.2977,
      "step": 10988
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9030786752700806,
      "learning_rate": 1.2610821084109203e-05,
      "loss": 1.6885,
      "step": 10989
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.250300407409668,
      "learning_rate": 1.2599161189072427e-05,
      "loss": 0.9382,
      "step": 10990
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8563936948776245,
      "learning_rate": 1.25875063244567e-05,
      "loss": 1.1645,
      "step": 10991
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.478269338607788,
      "learning_rate": 1.2575856490932868e-05,
      "loss": 1.1304,
      "step": 10992
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2811099290847778,
      "learning_rate": 1.2564211689171423e-05,
      "loss": 1.5039,
      "step": 10993
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.137068748474121,
      "learning_rate": 1.2552571919842559e-05,
      "loss": 2.0069,
      "step": 10994
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4013005495071411,
      "learning_rate": 1.2540937183616263e-05,
      "loss": 1.1718,
      "step": 10995
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4046839475631714,
      "learning_rate": 1.2529307481162134e-05,
      "loss": 1.123,
      "step": 10996
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2232791185379028,
      "learning_rate": 1.2517682813149545e-05,
      "loss": 1.0435,
      "step": 10997
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.205697774887085,
      "learning_rate": 1.2506063180247596e-05,
      "loss": 1.5199,
      "step": 10998
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1035542488098145,
      "learning_rate": 1.2494448583125018e-05,
      "loss": 1.4943,
      "step": 10999
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5101292133331299,
      "learning_rate": 1.248283902245031e-05,
      "loss": 1.7452,
      "step": 11000
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6402497291564941,
      "learning_rate": 1.2471234498891692e-05,
      "loss": 1.3153,
      "step": 11001
    },
    {
      "epoch": 0.84,
      "grad_norm": 5.683802604675293,
      "learning_rate": 1.2459635013117043e-05,
      "loss": 2.1973,
      "step": 11002
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2708654403686523,
      "learning_rate": 1.2448040565794005e-05,
      "loss": 1.06,
      "step": 11003
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2995195388793945,
      "learning_rate": 1.2436451157589902e-05,
      "loss": 1.5956,
      "step": 11004
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.382264494895935,
      "learning_rate": 1.2424866789171729e-05,
      "loss": 1.4583,
      "step": 11005
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.0953590869903564,
      "learning_rate": 1.2413287461206268e-05,
      "loss": 1.1166,
      "step": 11006
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3538181781768799,
      "learning_rate": 1.2401713174359997e-05,
      "loss": 0.6011,
      "step": 11007
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.1104848384857178,
      "learning_rate": 1.2390143929299024e-05,
      "loss": 1.6337,
      "step": 11008
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.200852870941162,
      "learning_rate": 1.2378579726689277e-05,
      "loss": 1.7761,
      "step": 11009
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.91957688331604,
      "learning_rate": 1.2367020567196341e-05,
      "loss": 1.8664,
      "step": 11010
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5362507104873657,
      "learning_rate": 1.2355466451485465e-05,
      "loss": 1.1528,
      "step": 11011
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.702241063117981,
      "learning_rate": 1.2343917380221715e-05,
      "loss": 1.9485,
      "step": 11012
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1332261562347412,
      "learning_rate": 1.2332373354069782e-05,
      "loss": 1.5264,
      "step": 11013
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.08528995513916,
      "learning_rate": 1.2320834373694057e-05,
      "loss": 1.2564,
      "step": 11014
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3152121305465698,
      "learning_rate": 1.2309300439758709e-05,
      "loss": 1.1566,
      "step": 11015
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1548001766204834,
      "learning_rate": 1.2297771552927606e-05,
      "loss": 1.7731,
      "step": 11016
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3229299783706665,
      "learning_rate": 1.2286247713864241e-05,
      "loss": 1.3174,
      "step": 11017
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.980249285697937,
      "learning_rate": 1.2274728923231915e-05,
      "loss": 1.3765,
      "step": 11018
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2863895893096924,
      "learning_rate": 1.2263215181693621e-05,
      "loss": 1.5144,
      "step": 11019
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2568442821502686,
      "learning_rate": 1.2251706489911984e-05,
      "loss": 1.0319,
      "step": 11020
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1821058988571167,
      "learning_rate": 1.2240202848549454e-05,
      "loss": 1.9002,
      "step": 11021
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1123244762420654,
      "learning_rate": 1.2228704258268109e-05,
      "loss": 1.7341,
      "step": 11022
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0205113887786865,
      "learning_rate": 1.221721071972972e-05,
      "loss": 1.4543,
      "step": 11023
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9494538307189941,
      "learning_rate": 1.2205722233595852e-05,
      "loss": 1.2354,
      "step": 11024
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.689877986907959,
      "learning_rate": 1.2194238800527747e-05,
      "loss": 1.2343,
      "step": 11025
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.231349229812622,
      "learning_rate": 1.218276042118629e-05,
      "loss": 1.2191,
      "step": 11026
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3947654962539673,
      "learning_rate": 1.2171287096232153e-05,
      "loss": 1.4931,
      "step": 11027
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4716302156448364,
      "learning_rate": 1.215981882632572e-05,
      "loss": 1.123,
      "step": 11028
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1203324794769287,
      "learning_rate": 1.2148355612127005e-05,
      "loss": 1.4346,
      "step": 11029
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.533203125,
      "learning_rate": 1.2136897454295837e-05,
      "loss": 0.743,
      "step": 11030
    },
    {
      "epoch": 0.84,
      "grad_norm": 4.05903434753418,
      "learning_rate": 1.2125444353491644e-05,
      "loss": 1.6548,
      "step": 11031
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1031707525253296,
      "learning_rate": 1.2113996310373655e-05,
      "loss": 1.365,
      "step": 11032
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5056333541870117,
      "learning_rate": 1.2102553325600752e-05,
      "loss": 1.636,
      "step": 11033
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2167754173278809,
      "learning_rate": 1.2091115399831565e-05,
      "loss": 1.3109,
      "step": 11034
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6331123113632202,
      "learning_rate": 1.2079682533724379e-05,
      "loss": 1.7378,
      "step": 11035
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.8101341128349304,
      "learning_rate": 1.2068254727937245e-05,
      "loss": 1.1813,
      "step": 11036
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9279729127883911,
      "learning_rate": 1.2056831983127914e-05,
      "loss": 1.1192,
      "step": 11037
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4241498708724976,
      "learning_rate": 1.2045414299953784e-05,
      "loss": 1.4748,
      "step": 11038
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.489919662475586,
      "learning_rate": 1.203400167907205e-05,
      "loss": 1.6853,
      "step": 11039
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.4524600505828857,
      "learning_rate": 1.2022594121139575e-05,
      "loss": 2.4304,
      "step": 11040
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9973757863044739,
      "learning_rate": 1.2011191626812912e-05,
      "loss": 1.1807,
      "step": 11041
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3369563817977905,
      "learning_rate": 1.1999794196748315e-05,
      "loss": 0.8827,
      "step": 11042
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3719933032989502,
      "learning_rate": 1.1988401831601836e-05,
      "loss": 1.4683,
      "step": 11043
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3252400159835815,
      "learning_rate": 1.1977014532029108e-05,
      "loss": 1.329,
      "step": 11044
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2891169786453247,
      "learning_rate": 1.1965632298685559e-05,
      "loss": 1.0485,
      "step": 11045
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7557357549667358,
      "learning_rate": 1.1954255132226322e-05,
      "loss": 1.7115,
      "step": 11046
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1839876174926758,
      "learning_rate": 1.1942883033306185e-05,
      "loss": 1.5033,
      "step": 11047
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.646167278289795,
      "learning_rate": 1.1931516002579701e-05,
      "loss": 1.3304,
      "step": 11048
    },
    {
      "epoch": 0.84,
      "grad_norm": 4.290740966796875,
      "learning_rate": 1.1920154040701115e-05,
      "loss": 1.5694,
      "step": 11049
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6699624061584473,
      "learning_rate": 1.1908797148324358e-05,
      "loss": 1.176,
      "step": 11050
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.436949610710144,
      "learning_rate": 1.1897445326103074e-05,
      "loss": 1.5715,
      "step": 11051
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.9302831292152405,
      "learning_rate": 1.1886098574690651e-05,
      "loss": 1.32,
      "step": 11052
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.206962823867798,
      "learning_rate": 1.1874756894740135e-05,
      "loss": 2.0493,
      "step": 11053
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1951037645339966,
      "learning_rate": 1.1863420286904303e-05,
      "loss": 1.8125,
      "step": 11054
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.1706734895706177,
      "learning_rate": 1.1852088751835689e-05,
      "loss": 1.3964,
      "step": 11055
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7838817834854126,
      "learning_rate": 1.1840762290186425e-05,
      "loss": 0.8558,
      "step": 11056
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3083254098892212,
      "learning_rate": 1.1829440902608446e-05,
      "loss": 1.2696,
      "step": 11057
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0059758424758911,
      "learning_rate": 1.1818124589753388e-05,
      "loss": 1.1785,
      "step": 11058
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6214759349822998,
      "learning_rate": 1.1806813352272528e-05,
      "loss": 1.786,
      "step": 11059
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7623833417892456,
      "learning_rate": 1.17955071908169e-05,
      "loss": 0.717,
      "step": 11060
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.778543472290039,
      "learning_rate": 1.1784206106037266e-05,
      "loss": 1.8035,
      "step": 11061
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4978222846984863,
      "learning_rate": 1.1772910098584023e-05,
      "loss": 2.1286,
      "step": 11062
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6345820426940918,
      "learning_rate": 1.176161916910734e-05,
      "loss": 1.6479,
      "step": 11063
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7376724481582642,
      "learning_rate": 1.1750333318257113e-05,
      "loss": 1.3442,
      "step": 11064
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5498571395874023,
      "learning_rate": 1.173905254668285e-05,
      "loss": 1.4692,
      "step": 11065
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3210208415985107,
      "learning_rate": 1.1727776855033857e-05,
      "loss": 1.3322,
      "step": 11066
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.880560874938965,
      "learning_rate": 1.1716506243959124e-05,
      "loss": 1.1565,
      "step": 11067
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.466672658920288,
      "learning_rate": 1.1705240714107302e-05,
      "loss": 1.6166,
      "step": 11068
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.0861172676086426,
      "learning_rate": 1.1693980266126836e-05,
      "loss": 1.1237,
      "step": 11069
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.00504732131958,
      "learning_rate": 1.1682724900665798e-05,
      "loss": 1.3004,
      "step": 11070
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4051094055175781,
      "learning_rate": 1.167147461837198e-05,
      "loss": 0.8987,
      "step": 11071
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.453400731086731,
      "learning_rate": 1.166022941989292e-05,
      "loss": 1.217,
      "step": 11072
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.142496109008789,
      "learning_rate": 1.1648989305875879e-05,
      "loss": 1.5339,
      "step": 11073
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9354833364486694,
      "learning_rate": 1.1637754276967739e-05,
      "loss": 0.7776,
      "step": 11074
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.356468915939331,
      "learning_rate": 1.1626524333815148e-05,
      "loss": 1.9733,
      "step": 11075
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6635960340499878,
      "learning_rate": 1.16152994770645e-05,
      "loss": 1.9216,
      "step": 11076
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5463203191757202,
      "learning_rate": 1.1604079707361792e-05,
      "loss": 1.8996,
      "step": 11077
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3793840408325195,
      "learning_rate": 1.1592865025352827e-05,
      "loss": 1.1554,
      "step": 11078
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.629734516143799,
      "learning_rate": 1.1581655431683069e-05,
      "loss": 1.5164,
      "step": 11079
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4000799655914307,
      "learning_rate": 1.1570450926997655e-05,
      "loss": 1.4884,
      "step": 11080
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1579852104187012,
      "learning_rate": 1.1559251511941493e-05,
      "loss": 1.1082,
      "step": 11081
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.430679440498352,
      "learning_rate": 1.1548057187159212e-05,
      "loss": 1.3708,
      "step": 11082
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4480284452438354,
      "learning_rate": 1.1536867953295039e-05,
      "loss": 1.7737,
      "step": 11083
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2172603607177734,
      "learning_rate": 1.1525683810993026e-05,
      "loss": 1.4196,
      "step": 11084
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.794674396514893,
      "learning_rate": 1.1514504760896893e-05,
      "loss": 1.2595,
      "step": 11085
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5997512340545654,
      "learning_rate": 1.1503330803650025e-05,
      "loss": 1.1936,
      "step": 11086
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6803407669067383,
      "learning_rate": 1.1492161939895585e-05,
      "loss": 2.1906,
      "step": 11087
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7790659666061401,
      "learning_rate": 1.1480998170276369e-05,
      "loss": 1.1813,
      "step": 11088
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1543169021606445,
      "learning_rate": 1.146983949543492e-05,
      "loss": 1.358,
      "step": 11089
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2200206518173218,
      "learning_rate": 1.1458685916013501e-05,
      "loss": 1.1345,
      "step": 11090
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3000820875167847,
      "learning_rate": 1.1447537432654065e-05,
      "loss": 1.3567,
      "step": 11091
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.200531244277954,
      "learning_rate": 1.1436394045998244e-05,
      "loss": 1.4539,
      "step": 11092
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0411128997802734,
      "learning_rate": 1.1425255756687436e-05,
      "loss": 1.5732,
      "step": 11093
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.22051203250885,
      "learning_rate": 1.1414122565362717e-05,
      "loss": 1.326,
      "step": 11094
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9964574575424194,
      "learning_rate": 1.140299447266483e-05,
      "loss": 1.6912,
      "step": 11095
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8731365203857422,
      "learning_rate": 1.1391871479234273e-05,
      "loss": 1.8776,
      "step": 11096
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.451831102371216,
      "learning_rate": 1.138075358571128e-05,
      "loss": 1.8624,
      "step": 11097
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3743293285369873,
      "learning_rate": 1.1369640792735714e-05,
      "loss": 2.3711,
      "step": 11098
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7660372257232666,
      "learning_rate": 1.1358533100947156e-05,
      "loss": 1.7321,
      "step": 11099
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.194772720336914,
      "learning_rate": 1.134743051098497e-05,
      "loss": 1.3878,
      "step": 11100
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.214531660079956,
      "learning_rate": 1.1336333023488133e-05,
      "loss": 1.9827,
      "step": 11101
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4987345933914185,
      "learning_rate": 1.132524063909538e-05,
      "loss": 0.9247,
      "step": 11102
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2680301666259766,
      "learning_rate": 1.1314153358445167e-05,
      "loss": 0.994,
      "step": 11103
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0525468587875366,
      "learning_rate": 1.1303071182175585e-05,
      "loss": 1.1494,
      "step": 11104
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3567513227462769,
      "learning_rate": 1.1291994110924509e-05,
      "loss": 1.9971,
      "step": 11105
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0388792753219604,
      "learning_rate": 1.1280922145329498e-05,
      "loss": 0.9964,
      "step": 11106
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.548672199249268,
      "learning_rate": 1.1269855286027797e-05,
      "loss": 2.5037,
      "step": 11107
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3250938653945923,
      "learning_rate": 1.1258793533656331e-05,
      "loss": 1.01,
      "step": 11108
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2133513689041138,
      "learning_rate": 1.1247736888851824e-05,
      "loss": 0.8748,
      "step": 11109
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2848719358444214,
      "learning_rate": 1.1236685352250597e-05,
      "loss": 1.3267,
      "step": 11110
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5353459119796753,
      "learning_rate": 1.1225638924488757e-05,
      "loss": 1.398,
      "step": 11111
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5598502159118652,
      "learning_rate": 1.1214597606202115e-05,
      "loss": 1.698,
      "step": 11112
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4733326435089111,
      "learning_rate": 1.1203561398026109e-05,
      "loss": 1.7086,
      "step": 11113
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8994604349136353,
      "learning_rate": 1.119253030059596e-05,
      "loss": 2.4238,
      "step": 11114
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3116079568862915,
      "learning_rate": 1.1181504314546599e-05,
      "loss": 0.8901,
      "step": 11115
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4775704145431519,
      "learning_rate": 1.1170483440512614e-05,
      "loss": 1.4413,
      "step": 11116
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.202831506729126,
      "learning_rate": 1.1159467679128288e-05,
      "loss": 0.9693,
      "step": 11117
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7112773656845093,
      "learning_rate": 1.1148457031027692e-05,
      "loss": 1.5279,
      "step": 11118
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.221389889717102,
      "learning_rate": 1.1137451496844498e-05,
      "loss": 1.1848,
      "step": 11119
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4100466966629028,
      "learning_rate": 1.1126451077212185e-05,
      "loss": 1.4012,
      "step": 11120
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6115833520889282,
      "learning_rate": 1.1115455772763883e-05,
      "loss": 1.3389,
      "step": 11121
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.146784782409668,
      "learning_rate": 1.1104465584132417e-05,
      "loss": 1.9255,
      "step": 11122
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.22720468044281,
      "learning_rate": 1.1093480511950339e-05,
      "loss": 1.1423,
      "step": 11123
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.540900468826294,
      "learning_rate": 1.1082500556849918e-05,
      "loss": 1.3222,
      "step": 11124
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4846135377883911,
      "learning_rate": 1.1071525719463095e-05,
      "loss": 1.2118,
      "step": 11125
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4209498167037964,
      "learning_rate": 1.1060556000421563e-05,
      "loss": 1.3197,
      "step": 11126
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5193170309066772,
      "learning_rate": 1.1049591400356674e-05,
      "loss": 1.1502,
      "step": 11127
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3904693126678467,
      "learning_rate": 1.103863191989949e-05,
      "loss": 1.5644,
      "step": 11128
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8855533599853516,
      "learning_rate": 1.1027677559680794e-05,
      "loss": 1.1872,
      "step": 11129
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.718925952911377,
      "learning_rate": 1.1016728320331093e-05,
      "loss": 1.2841,
      "step": 11130
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.639920949935913,
      "learning_rate": 1.1005784202480585e-05,
      "loss": 1.4098,
      "step": 11131
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3447173833847046,
      "learning_rate": 1.0994845206759141e-05,
      "loss": 1.6442,
      "step": 11132
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.846598505973816,
      "learning_rate": 1.0983911333796359e-05,
      "loss": 1.9214,
      "step": 11133
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5236320495605469,
      "learning_rate": 1.0972982584221592e-05,
      "loss": 1.6231,
      "step": 11134
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8295462131500244,
      "learning_rate": 1.0962058958663824e-05,
      "loss": 1.6799,
      "step": 11135
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.160500407218933,
      "learning_rate": 1.0951140457751741e-05,
      "loss": 1.3219,
      "step": 11136
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5279130935668945,
      "learning_rate": 1.0940227082113818e-05,
      "loss": 1.8087,
      "step": 11137
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5821839570999146,
      "learning_rate": 1.0929318832378144e-05,
      "loss": 1.2345,
      "step": 11138
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4450492858886719,
      "learning_rate": 1.091841570917257e-05,
      "loss": 1.7618,
      "step": 11139
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.0070364475250244,
      "learning_rate": 1.0907517713124638e-05,
      "loss": 1.3614,
      "step": 11140
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.229358196258545,
      "learning_rate": 1.0896624844861569e-05,
      "loss": 1.0811,
      "step": 11141
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4622479677200317,
      "learning_rate": 1.088573710501033e-05,
      "loss": 1.1455,
      "step": 11142
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0209051370620728,
      "learning_rate": 1.0874854494197572e-05,
      "loss": 0.9684,
      "step": 11143
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7959492206573486,
      "learning_rate": 1.0863977013049664e-05,
      "loss": 1.7563,
      "step": 11144
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5462875366210938,
      "learning_rate": 1.0853104662192615e-05,
      "loss": 3.0253,
      "step": 11145
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2178839445114136,
      "learning_rate": 1.0842237442252257e-05,
      "loss": 1.4854,
      "step": 11146
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3035776615142822,
      "learning_rate": 1.0831375353854002e-05,
      "loss": 1.7272,
      "step": 11147
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.108523964881897,
      "learning_rate": 1.0820518397623058e-05,
      "loss": 1.5615,
      "step": 11148
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0348529815673828,
      "learning_rate": 1.0809666574184329e-05,
      "loss": 1.4169,
      "step": 11149
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6318230628967285,
      "learning_rate": 1.0798819884162348e-05,
      "loss": 1.7143,
      "step": 11150
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3458337783813477,
      "learning_rate": 1.0787978328181425e-05,
      "loss": 1.5392,
      "step": 11151
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0620529651641846,
      "learning_rate": 1.0777141906865584e-05,
      "loss": 1.0293,
      "step": 11152
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6334625482559204,
      "learning_rate": 1.0766310620838494e-05,
      "loss": 1.557,
      "step": 11153
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9927805662155151,
      "learning_rate": 1.0755484470723543e-05,
      "loss": 0.4839,
      "step": 11154
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9443413019180298,
      "learning_rate": 1.0744663457143878e-05,
      "loss": 1.4133,
      "step": 11155
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4863831996917725,
      "learning_rate": 1.0733847580722267e-05,
      "loss": 0.7538,
      "step": 11156
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.512190580368042,
      "learning_rate": 1.0723036842081246e-05,
      "loss": 1.6229,
      "step": 11157
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.0043195486068726,
      "learning_rate": 1.0712231241843062e-05,
      "loss": 1.3594,
      "step": 11158
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6535857915878296,
      "learning_rate": 1.0701430780629585e-05,
      "loss": 1.3753,
      "step": 11159
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2520582675933838,
      "learning_rate": 1.0690635459062482e-05,
      "loss": 1.4449,
      "step": 11160
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3722844123840332,
      "learning_rate": 1.067984527776309e-05,
      "loss": 1.3728,
      "step": 11161
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1915167570114136,
      "learning_rate": 1.0669060237352425e-05,
      "loss": 1.6988,
      "step": 11162
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.3113248348236084,
      "learning_rate": 1.0658280338451243e-05,
      "loss": 1.4971,
      "step": 11163
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.227130651473999,
      "learning_rate": 1.0647505581679984e-05,
      "loss": 1.8307,
      "step": 11164
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.93298602104187,
      "learning_rate": 1.0636735967658784e-05,
      "loss": 1.4867,
      "step": 11165
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8890818357467651,
      "learning_rate": 1.0625971497007504e-05,
      "loss": 1.5104,
      "step": 11166
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.1583712100982666,
      "learning_rate": 1.0615212170345723e-05,
      "loss": 1.5158,
      "step": 11167
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7214837074279785,
      "learning_rate": 1.0604457988292671e-05,
      "loss": 1.2923,
      "step": 11168
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3966013193130493,
      "learning_rate": 1.0593708951467319e-05,
      "loss": 1.011,
      "step": 11169
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.619653344154358,
      "learning_rate": 1.0582965060488359e-05,
      "loss": 2.0643,
      "step": 11170
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.377287745475769,
      "learning_rate": 1.0572226315974131e-05,
      "loss": 1.4359,
      "step": 11171
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4416030645370483,
      "learning_rate": 1.0561492718542754e-05,
      "loss": 0.9684,
      "step": 11172
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.572688341140747,
      "learning_rate": 1.0550764268811986e-05,
      "loss": 1.1785,
      "step": 11173
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7064118385314941,
      "learning_rate": 1.0540040967399279e-05,
      "loss": 0.9456,
      "step": 11174
    },
    {
      "epoch": 0.85,
      "grad_norm": 6.384535789489746,
      "learning_rate": 1.052932281492185e-05,
      "loss": 2.2474,
      "step": 11175
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.11014986038208,
      "learning_rate": 1.0518609811996616e-05,
      "loss": 1.0022,
      "step": 11176
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5083670616149902,
      "learning_rate": 1.050790195924012e-05,
      "loss": 1.3372,
      "step": 11177
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.179936408996582,
      "learning_rate": 1.0497199257268686e-05,
      "loss": 1.0479,
      "step": 11178
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.052049160003662,
      "learning_rate": 1.0486501706698338e-05,
      "loss": 1.1219,
      "step": 11179
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2898106575012207,
      "learning_rate": 1.0475809308144747e-05,
      "loss": 1.2915,
      "step": 11180
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.9215800166130066,
      "learning_rate": 1.0465122062223343e-05,
      "loss": 1.0184,
      "step": 11181
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.421630382537842,
      "learning_rate": 1.0454439969549234e-05,
      "loss": 1.6055,
      "step": 11182
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2036681175231934,
      "learning_rate": 1.0443763030737208e-05,
      "loss": 1.0477,
      "step": 11183
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5433613061904907,
      "learning_rate": 1.0433091246401815e-05,
      "loss": 1.9269,
      "step": 11184
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2346553802490234,
      "learning_rate": 1.042242461715729e-05,
      "loss": 0.994,
      "step": 11185
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3804075717926025,
      "learning_rate": 1.0411763143617526e-05,
      "loss": 1.665,
      "step": 11186
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5358285903930664,
      "learning_rate": 1.0401106826396156e-05,
      "loss": 1.3029,
      "step": 11187
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.016827940940857,
      "learning_rate": 1.0390455666106547e-05,
      "loss": 1.5137,
      "step": 11188
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.8587220311164856,
      "learning_rate": 1.0379809663361694e-05,
      "loss": 1.5097,
      "step": 11189
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.307289481163025,
      "learning_rate": 1.0369168818774366e-05,
      "loss": 1.8979,
      "step": 11190
    },
    {
      "epoch": 0.85,
      "grad_norm": 5.594599723815918,
      "learning_rate": 1.035853313295697e-05,
      "loss": 1.7545,
      "step": 11191
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3252912759780884,
      "learning_rate": 1.0347902606521686e-05,
      "loss": 1.6931,
      "step": 11192
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5595146417617798,
      "learning_rate": 1.0337277240080346e-05,
      "loss": 1.8602,
      "step": 11193
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5950504541397095,
      "learning_rate": 1.0326657034244503e-05,
      "loss": 1.4831,
      "step": 11194
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.9005088806152344,
      "learning_rate": 1.0316041989625402e-05,
      "loss": 1.6008,
      "step": 11195
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8113853931427002,
      "learning_rate": 1.0305432106834001e-05,
      "loss": 1.1901,
      "step": 11196
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8072021007537842,
      "learning_rate": 1.0294827386480987e-05,
      "loss": 1.3012,
      "step": 11197
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3784562349319458,
      "learning_rate": 1.0284227829176696e-05,
      "loss": 1.3312,
      "step": 11198
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.058736562728882,
      "learning_rate": 1.0273633435531183e-05,
      "loss": 1.5,
      "step": 11199
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6980007886886597,
      "learning_rate": 1.026304420615426e-05,
      "loss": 1.2235,
      "step": 11200
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4954923391342163,
      "learning_rate": 1.0252460141655374e-05,
      "loss": 1.3627,
      "step": 11201
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3344435691833496,
      "learning_rate": 1.024188124264367e-05,
      "loss": 1.3957,
      "step": 11202
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2140685319900513,
      "learning_rate": 1.0231307509728072e-05,
      "loss": 1.663,
      "step": 11203
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.086958885192871,
      "learning_rate": 1.0220738943517128e-05,
      "loss": 1.2142,
      "step": 11204
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2977463006973267,
      "learning_rate": 1.0210175544619116e-05,
      "loss": 0.9717,
      "step": 11205
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1836010217666626,
      "learning_rate": 1.0199617313642063e-05,
      "loss": 1.2074,
      "step": 11206
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.984197735786438,
      "learning_rate": 1.0189064251193603e-05,
      "loss": 1.1872,
      "step": 11207
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.403122901916504,
      "learning_rate": 1.0178516357881152e-05,
      "loss": 1.1662,
      "step": 11208
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1358280181884766,
      "learning_rate": 1.0167973634311823e-05,
      "loss": 1.3453,
      "step": 11209
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.118740439414978,
      "learning_rate": 1.0157436081092375e-05,
      "loss": 0.8998,
      "step": 11210
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8738133907318115,
      "learning_rate": 1.0146903698829313e-05,
      "loss": 1.3147,
      "step": 11211
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.026065468788147,
      "learning_rate": 1.0136376488128851e-05,
      "loss": 1.1589,
      "step": 11212
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0455389022827148,
      "learning_rate": 1.012585444959685e-05,
      "loss": 1.4299,
      "step": 11213
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8033894300460815,
      "learning_rate": 1.0115337583838957e-05,
      "loss": 1.2651,
      "step": 11214
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6160680055618286,
      "learning_rate": 1.010482589146048e-05,
      "loss": 1.5298,
      "step": 11215
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.334845781326294,
      "learning_rate": 1.0094319373066386e-05,
      "loss": 1.4521,
      "step": 11216
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4792017936706543,
      "learning_rate": 1.0083818029261416e-05,
      "loss": 1.0509,
      "step": 11217
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.183870792388916,
      "learning_rate": 1.0073321860649998e-05,
      "loss": 1.2341,
      "step": 11218
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.306630253791809,
      "learning_rate": 1.0062830867836226e-05,
      "loss": 1.2574,
      "step": 11219
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1347270011901855,
      "learning_rate": 1.0052345051423906e-05,
      "loss": 1.7737,
      "step": 11220
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2056066989898682,
      "learning_rate": 1.0041864412016578e-05,
      "loss": 0.9569,
      "step": 11221
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1850378513336182,
      "learning_rate": 1.0031388950217434e-05,
      "loss": 1.6604,
      "step": 11222
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4508379697799683,
      "learning_rate": 1.002091866662943e-05,
      "loss": 1.3307,
      "step": 11223
    },
    {
      "epoch": 0.86,
      "grad_norm": 4.929997444152832,
      "learning_rate": 1.0010453561855182e-05,
      "loss": 1.738,
      "step": 11224
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5637656450271606,
      "learning_rate": 9.999993636497008e-06,
      "loss": 1.5754,
      "step": 11225
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.379596471786499,
      "learning_rate": 9.989538891156935e-06,
      "loss": 1.6736,
      "step": 11226
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.495634913444519,
      "learning_rate": 9.979089326436719e-06,
      "loss": 1.776,
      "step": 11227
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3513728380203247,
      "learning_rate": 9.968644942937755e-06,
      "loss": 0.862,
      "step": 11228
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8678019046783447,
      "learning_rate": 9.958205741261217e-06,
      "loss": 1.4665,
      "step": 11229
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0673168897628784,
      "learning_rate": 9.947771722007915e-06,
      "loss": 1.3079,
      "step": 11230
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2504394054412842,
      "learning_rate": 9.937342885778378e-06,
      "loss": 1.3796,
      "step": 11231
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.347726821899414,
      "learning_rate": 9.92691923317286e-06,
      "loss": 1.2361,
      "step": 11232
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3782925605773926,
      "learning_rate": 9.916500764791326e-06,
      "loss": 1.7487,
      "step": 11233
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4610612392425537,
      "learning_rate": 9.906087481233373e-06,
      "loss": 1.5717,
      "step": 11234
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3660283088684082,
      "learning_rate": 9.895679383098367e-06,
      "loss": 1.5186,
      "step": 11235
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.697239637374878,
      "learning_rate": 9.885276470985372e-06,
      "loss": 1.0897,
      "step": 11236
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4376235008239746,
      "learning_rate": 9.8748787454931e-06,
      "loss": 1.0136,
      "step": 11237
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.699928045272827,
      "learning_rate": 9.864486207220036e-06,
      "loss": 1.714,
      "step": 11238
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4856858253479004,
      "learning_rate": 9.854098856764316e-06,
      "loss": 1.4525,
      "step": 11239
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0729399919509888,
      "learning_rate": 9.84371669472376e-06,
      "loss": 0.8976,
      "step": 11240
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5387407541275024,
      "learning_rate": 9.833339721695955e-06,
      "loss": 1.0845,
      "step": 11241
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9967383146286011,
      "learning_rate": 9.822967938278171e-06,
      "loss": 0.7358,
      "step": 11242
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.0161709785461426,
      "learning_rate": 9.812601345067319e-06,
      "loss": 1.0031,
      "step": 11243
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9773069024085999,
      "learning_rate": 9.802239942660074e-06,
      "loss": 0.5726,
      "step": 11244
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7935625314712524,
      "learning_rate": 9.791883731652828e-06,
      "loss": 0.9211,
      "step": 11245
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.41087806224823,
      "learning_rate": 9.781532712641594e-06,
      "loss": 0.8981,
      "step": 11246
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0970287322998047,
      "learning_rate": 9.77118688622216e-06,
      "loss": 1.8212,
      "step": 11247
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.328401803970337,
      "learning_rate": 9.760846252989997e-06,
      "loss": 0.9022,
      "step": 11248
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.646218180656433,
      "learning_rate": 9.750510813540214e-06,
      "loss": 1.7293,
      "step": 11249
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.892874002456665,
      "learning_rate": 9.740180568467717e-06,
      "loss": 0.7134,
      "step": 11250
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.9563686847686768,
      "learning_rate": 9.72985551836708e-06,
      "loss": 1.532,
      "step": 11251
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.341181755065918,
      "learning_rate": 9.719535663832534e-06,
      "loss": 1.21,
      "step": 11252
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6695400476455688,
      "learning_rate": 9.70922100545807e-06,
      "loss": 1.1769,
      "step": 11253
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.394589900970459,
      "learning_rate": 9.698911543837375e-06,
      "loss": 1.2296,
      "step": 11254
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.154628038406372,
      "learning_rate": 9.688607279563766e-06,
      "loss": 1.6664,
      "step": 11255
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.354310154914856,
      "learning_rate": 9.678308213230347e-06,
      "loss": 1.4335,
      "step": 11256
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1801514625549316,
      "learning_rate": 9.668014345429898e-06,
      "loss": 1.1775,
      "step": 11257
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.822495698928833,
      "learning_rate": 9.65772567675488e-06,
      "loss": 0.9629,
      "step": 11258
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.52295982837677,
      "learning_rate": 9.64744220779743e-06,
      "loss": 1.0609,
      "step": 11259
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.588303327560425,
      "learning_rate": 9.637163939149485e-06,
      "loss": 1.6965,
      "step": 11260
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7920247316360474,
      "learning_rate": 9.626890871402561e-06,
      "loss": 1.2195,
      "step": 11261
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.057985782623291,
      "learning_rate": 9.616623005147951e-06,
      "loss": 1.3758,
      "step": 11262
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3025884628295898,
      "learning_rate": 9.60636034097665e-06,
      "loss": 1.6826,
      "step": 11263
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.151187539100647,
      "learning_rate": 9.596102879479307e-06,
      "loss": 1.3468,
      "step": 11264
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2474846839904785,
      "learning_rate": 9.585850621246306e-06,
      "loss": 1.3196,
      "step": 11265
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6934690475463867,
      "learning_rate": 9.575603566867742e-06,
      "loss": 1.6032,
      "step": 11266
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9577284455299377,
      "learning_rate": 9.565361716933374e-06,
      "loss": 1.1507,
      "step": 11267
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1378583908081055,
      "learning_rate": 9.555125072032667e-06,
      "loss": 1.1429,
      "step": 11268
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6053154468536377,
      "learning_rate": 9.544893632754814e-06,
      "loss": 1.1986,
      "step": 11269
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.612874984741211,
      "learning_rate": 9.534667399688702e-06,
      "loss": 1.5108,
      "step": 11270
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.021183967590332,
      "learning_rate": 9.52444637342289e-06,
      "loss": 1.5396,
      "step": 11271
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3350608348846436,
      "learning_rate": 9.514230554545667e-06,
      "loss": 1.4123,
      "step": 11272
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.224621057510376,
      "learning_rate": 9.504019943645027e-06,
      "loss": 1.301,
      "step": 11273
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7558919191360474,
      "learning_rate": 9.493814541308621e-06,
      "loss": 2.5788,
      "step": 11274
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5081548690795898,
      "learning_rate": 9.48361434812386e-06,
      "loss": 1.4128,
      "step": 11275
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1862260103225708,
      "learning_rate": 9.473419364677794e-06,
      "loss": 1.4415,
      "step": 11276
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7683758735656738,
      "learning_rate": 9.46322959155721e-06,
      "loss": 1.2641,
      "step": 11277
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7337599992752075,
      "learning_rate": 9.453045029348584e-06,
      "loss": 1.4424,
      "step": 11278
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4343984127044678,
      "learning_rate": 9.442865678638125e-06,
      "loss": 0.6167,
      "step": 11279
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4879419803619385,
      "learning_rate": 9.432691540011674e-06,
      "loss": 1.0329,
      "step": 11280
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1002638339996338,
      "learning_rate": 9.42252261405483e-06,
      "loss": 1.1129,
      "step": 11281
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1527864933013916,
      "learning_rate": 9.41235890135289e-06,
      "loss": 1.3449,
      "step": 11282
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4155384302139282,
      "learning_rate": 9.402200402490813e-06,
      "loss": 1.044,
      "step": 11283
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2087464332580566,
      "learning_rate": 9.392047118053282e-06,
      "loss": 1.7572,
      "step": 11284
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9637470245361328,
      "learning_rate": 9.381899048624676e-06,
      "loss": 1.4517,
      "step": 11285
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3528469800949097,
      "learning_rate": 9.371756194789082e-06,
      "loss": 1.1147,
      "step": 11286
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.438973069190979,
      "learning_rate": 9.36161855713027e-06,
      "loss": 1.4392,
      "step": 11287
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9859246015548706,
      "learning_rate": 9.351486136231735e-06,
      "loss": 0.9532,
      "step": 11288
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.699727177619934,
      "learning_rate": 9.341358932676625e-06,
      "loss": 0.8717,
      "step": 11289
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5218967199325562,
      "learning_rate": 9.33123694704784e-06,
      "loss": 1.1493,
      "step": 11290
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5675277709960938,
      "learning_rate": 9.321120179927978e-06,
      "loss": 1.1848,
      "step": 11291
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4212626218795776,
      "learning_rate": 9.311008631899287e-06,
      "loss": 0.9906,
      "step": 11292
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7913482189178467,
      "learning_rate": 9.300902303543747e-06,
      "loss": 1.2041,
      "step": 11293
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3891055583953857,
      "learning_rate": 9.290801195443054e-06,
      "loss": 1.2297,
      "step": 11294
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5536783933639526,
      "learning_rate": 9.280705308178583e-06,
      "loss": 0.8915,
      "step": 11295
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1766886711120605,
      "learning_rate": 9.270614642331376e-06,
      "loss": 1.5004,
      "step": 11296
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2535970211029053,
      "learning_rate": 9.260529198482259e-06,
      "loss": 1.3283,
      "step": 11297
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.366347312927246,
      "learning_rate": 9.250448977211657e-06,
      "loss": 1.3252,
      "step": 11298
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9499399065971375,
      "learning_rate": 9.240373979099782e-06,
      "loss": 1.0927,
      "step": 11299
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9857615232467651,
      "learning_rate": 9.230304204726503e-06,
      "loss": 0.4802,
      "step": 11300
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.347132682800293,
      "learning_rate": 9.22023965467137e-06,
      "loss": 1.0259,
      "step": 11301
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3723132610321045,
      "learning_rate": 9.210180329513674e-06,
      "loss": 1.2965,
      "step": 11302
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9850864410400391,
      "learning_rate": 9.200126229832407e-06,
      "loss": 1.2491,
      "step": 11303
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.9851043224334717,
      "learning_rate": 9.190077356206217e-06,
      "loss": 0.8559,
      "step": 11304
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6680388450622559,
      "learning_rate": 9.180033709213454e-06,
      "loss": 1.3537,
      "step": 11305
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6106868982315063,
      "learning_rate": 9.169995289432232e-06,
      "loss": 1.5514,
      "step": 11306
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4216479063034058,
      "learning_rate": 9.159962097440278e-06,
      "loss": 1.2142,
      "step": 11307
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6848632097244263,
      "learning_rate": 9.149934133815075e-06,
      "loss": 1.7191,
      "step": 11308
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2150026559829712,
      "learning_rate": 9.139911399133816e-06,
      "loss": 1.3465,
      "step": 11309
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6175371408462524,
      "learning_rate": 9.12989389397333e-06,
      "loss": 0.894,
      "step": 11310
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4167137145996094,
      "learning_rate": 9.119881618910198e-06,
      "loss": 1.3414,
      "step": 11311
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6557685136795044,
      "learning_rate": 9.109874574520694e-06,
      "loss": 1.2231,
      "step": 11312
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4203133583068848,
      "learning_rate": 9.099872761380778e-06,
      "loss": 1.5895,
      "step": 11313
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2934448719024658,
      "learning_rate": 9.089876180066093e-06,
      "loss": 0.9971,
      "step": 11314
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.239237070083618,
      "learning_rate": 9.079884831152019e-06,
      "loss": 1.5093,
      "step": 11315
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.634143590927124,
      "learning_rate": 9.069898715213588e-06,
      "loss": 1.2058,
      "step": 11316
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2893505096435547,
      "learning_rate": 9.059917832825582e-06,
      "loss": 1.0595,
      "step": 11317
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7148302793502808,
      "learning_rate": 9.049942184562477e-06,
      "loss": 1.677,
      "step": 11318
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1580979824066162,
      "learning_rate": 9.03997177099838e-06,
      "loss": 1.5933,
      "step": 11319
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1876474618911743,
      "learning_rate": 9.030006592707174e-06,
      "loss": 0.8606,
      "step": 11320
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3839830160140991,
      "learning_rate": 9.020046650262438e-06,
      "loss": 1.1951,
      "step": 11321
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.147110939025879,
      "learning_rate": 9.010091944237365e-06,
      "loss": 1.152,
      "step": 11322
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8834657669067383,
      "learning_rate": 9.000142475204964e-06,
      "loss": 1.1449,
      "step": 11323
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.2917075157165527,
      "learning_rate": 8.990198243737858e-06,
      "loss": 1.6123,
      "step": 11324
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.126856803894043,
      "learning_rate": 8.980259250408363e-06,
      "loss": 1.1582,
      "step": 11325
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.1191649436950684,
      "learning_rate": 8.970325495788567e-06,
      "loss": 1.0998,
      "step": 11326
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4198739528656006,
      "learning_rate": 8.960396980450225e-06,
      "loss": 1.6745,
      "step": 11327
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.996631383895874,
      "learning_rate": 8.950473704964735e-06,
      "loss": 1.8464,
      "step": 11328
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.571574330329895,
      "learning_rate": 8.940555669903261e-06,
      "loss": 1.0503,
      "step": 11329
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5332257747650146,
      "learning_rate": 8.93064287583667e-06,
      "loss": 1.3451,
      "step": 11330
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.630937337875366,
      "learning_rate": 8.920735323335449e-06,
      "loss": 1.5429,
      "step": 11331
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3401991128921509,
      "learning_rate": 8.910833012969877e-06,
      "loss": 1.4815,
      "step": 11332
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6075142621994019,
      "learning_rate": 8.900935945309884e-06,
      "loss": 1.2666,
      "step": 11333
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0201380252838135,
      "learning_rate": 8.891044120925063e-06,
      "loss": 1.4356,
      "step": 11334
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.221336841583252,
      "learning_rate": 8.881157540384777e-06,
      "loss": 1.4034,
      "step": 11335
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.419989585876465,
      "learning_rate": 8.871276204258072e-06,
      "loss": 0.7307,
      "step": 11336
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1770858764648438,
      "learning_rate": 8.861400113113638e-06,
      "loss": 1.3435,
      "step": 11337
    },
    {
      "epoch": 0.87,
      "grad_norm": 4.244934558868408,
      "learning_rate": 8.851529267519921e-06,
      "loss": 2.2549,
      "step": 11338
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.079965591430664,
      "learning_rate": 8.841663668045052e-06,
      "loss": 0.9538,
      "step": 11339
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9220998287200928,
      "learning_rate": 8.831803315256837e-06,
      "loss": 1.7216,
      "step": 11340
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3526792526245117,
      "learning_rate": 8.821948209722819e-06,
      "loss": 1.0276,
      "step": 11341
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4623169898986816,
      "learning_rate": 8.8120983520102e-06,
      "loss": 1.3689,
      "step": 11342
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.798775553703308,
      "learning_rate": 8.802253742685873e-06,
      "loss": 1.3801,
      "step": 11343
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7162734270095825,
      "learning_rate": 8.792414382316483e-06,
      "loss": 1.2484,
      "step": 11344
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4879746437072754,
      "learning_rate": 8.782580271468344e-06,
      "loss": 1.5899,
      "step": 11345
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6503695249557495,
      "learning_rate": 8.772751410707448e-06,
      "loss": 1.1951,
      "step": 11346
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3923535346984863,
      "learning_rate": 8.762927800599497e-06,
      "loss": 0.7023,
      "step": 11347
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7251274585723877,
      "learning_rate": 8.753109441709939e-06,
      "loss": 1.1088,
      "step": 11348
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.998656988143921,
      "learning_rate": 8.743296334603824e-06,
      "loss": 1.4787,
      "step": 11349
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.797285556793213,
      "learning_rate": 8.733488479845997e-06,
      "loss": 1.0043,
      "step": 11350
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.239742398262024,
      "learning_rate": 8.723685878000909e-06,
      "loss": 1.654,
      "step": 11351
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9481651782989502,
      "learning_rate": 8.713888529632807e-06,
      "loss": 1.1281,
      "step": 11352
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2357892990112305,
      "learning_rate": 8.704096435305542e-06,
      "loss": 1.3099,
      "step": 11353
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4661637544631958,
      "learning_rate": 8.69430959558275e-06,
      "loss": 1.5175,
      "step": 11354
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8896851539611816,
      "learning_rate": 8.684528011027659e-06,
      "loss": 0.6215,
      "step": 11355
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4490880966186523,
      "learning_rate": 8.674751682203308e-06,
      "loss": 1.8053,
      "step": 11356
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1933424472808838,
      "learning_rate": 8.664980609672368e-06,
      "loss": 1.3533,
      "step": 11357
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2595633268356323,
      "learning_rate": 8.655214793997202e-06,
      "loss": 1.2939,
      "step": 11358
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.47130286693573,
      "learning_rate": 8.645454235739903e-06,
      "loss": 1.3005,
      "step": 11359
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5933018922805786,
      "learning_rate": 8.635698935462266e-06,
      "loss": 1.2894,
      "step": 11360
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7590023279190063,
      "learning_rate": 8.625948893725743e-06,
      "loss": 2.0328,
      "step": 11361
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2279915809631348,
      "learning_rate": 8.616204111091486e-06,
      "loss": 0.9685,
      "step": 11362
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.412453532218933,
      "learning_rate": 8.606464588120411e-06,
      "loss": 1.3423,
      "step": 11363
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5807042121887207,
      "learning_rate": 8.596730325373037e-06,
      "loss": 0.9388,
      "step": 11364
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3943030834198,
      "learning_rate": 8.587001323409638e-06,
      "loss": 1.4688,
      "step": 11365
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.333552360534668,
      "learning_rate": 8.57727758279021e-06,
      "loss": 1.568,
      "step": 11366
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0708600282669067,
      "learning_rate": 8.567559104074363e-06,
      "loss": 0.6463,
      "step": 11367
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8516366481781006,
      "learning_rate": 8.55784588782147e-06,
      "loss": 1.5361,
      "step": 11368
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1458697319030762,
      "learning_rate": 8.548137934590606e-06,
      "loss": 0.8655,
      "step": 11369
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.545872688293457,
      "learning_rate": 8.538435244940502e-06,
      "loss": 1.3735,
      "step": 11370
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0819144248962402,
      "learning_rate": 8.528737819429578e-06,
      "loss": 0.838,
      "step": 11371
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3298711776733398,
      "learning_rate": 8.51904565861601e-06,
      "loss": 1.5919,
      "step": 11372
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0539852380752563,
      "learning_rate": 8.509358763057607e-06,
      "loss": 1.3735,
      "step": 11373
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7283730506896973,
      "learning_rate": 8.499677133311934e-06,
      "loss": 1.6649,
      "step": 11374
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6566798686981201,
      "learning_rate": 8.490000769936224e-06,
      "loss": 1.5551,
      "step": 11375
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.099918007850647,
      "learning_rate": 8.480329673487386e-06,
      "loss": 1.1567,
      "step": 11376
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.44154691696167,
      "learning_rate": 8.470663844522052e-06,
      "loss": 1.7726,
      "step": 11377
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3225589990615845,
      "learning_rate": 8.461003283596569e-06,
      "loss": 1.4594,
      "step": 11378
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.66371488571167,
      "learning_rate": 8.451347991266956e-06,
      "loss": 1.4208,
      "step": 11379
    },
    {
      "epoch": 0.87,
      "grad_norm": 4.299140930175781,
      "learning_rate": 8.441697968088891e-06,
      "loss": 1.2094,
      "step": 11380
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7395639419555664,
      "learning_rate": 8.43205321461783e-06,
      "loss": 0.935,
      "step": 11381
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6151671409606934,
      "learning_rate": 8.422413731408852e-06,
      "loss": 1.3666,
      "step": 11382
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4072884321212769,
      "learning_rate": 8.412779519016778e-06,
      "loss": 1.6634,
      "step": 11383
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1652556657791138,
      "learning_rate": 8.403150577996145e-06,
      "loss": 1.0,
      "step": 11384
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2161002159118652,
      "learning_rate": 8.393526908901096e-06,
      "loss": 1.4583,
      "step": 11385
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6599394083023071,
      "learning_rate": 8.383908512285555e-06,
      "loss": 0.905,
      "step": 11386
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.6668787002563477,
      "learning_rate": 8.374295388703135e-06,
      "loss": 2.9259,
      "step": 11387
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8380393981933594,
      "learning_rate": 8.364687538707094e-06,
      "loss": 1.676,
      "step": 11388
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9322431683540344,
      "learning_rate": 8.355084962850458e-06,
      "loss": 1.1521,
      "step": 11389
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9419509172439575,
      "learning_rate": 8.34548766168588e-06,
      "loss": 1.6207,
      "step": 11390
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.312372088432312,
      "learning_rate": 8.335895635765734e-06,
      "loss": 1.4272,
      "step": 11391
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.264775037765503,
      "learning_rate": 8.326308885642109e-06,
      "loss": 1.0693,
      "step": 11392
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9735007286071777,
      "learning_rate": 8.316727411866787e-06,
      "loss": 1.3669,
      "step": 11393
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2412924766540527,
      "learning_rate": 8.307151214991216e-06,
      "loss": 0.8974,
      "step": 11394
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9789376854896545,
      "learning_rate": 8.297580295566575e-06,
      "loss": 1.2963,
      "step": 11395
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.088975191116333,
      "learning_rate": 8.288014654143739e-06,
      "loss": 1.3271,
      "step": 11396
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.296501874923706,
      "learning_rate": 8.27845429127323e-06,
      "loss": 1.091,
      "step": 11397
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3154981136322021,
      "learning_rate": 8.268899207505342e-06,
      "loss": 0.5051,
      "step": 11398
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6665810346603394,
      "learning_rate": 8.259349403390005e-06,
      "loss": 0.9419,
      "step": 11399
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.761000633239746,
      "learning_rate": 8.249804879476841e-06,
      "loss": 1.0574,
      "step": 11400
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3485088348388672,
      "learning_rate": 8.240265636315225e-06,
      "loss": 0.8975,
      "step": 11401
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.514742136001587,
      "learning_rate": 8.230731674454184e-06,
      "loss": 1.0436,
      "step": 11402
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9547617435455322,
      "learning_rate": 8.221202994442468e-06,
      "loss": 1.4652,
      "step": 11403
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5779775381088257,
      "learning_rate": 8.21167959682848e-06,
      "loss": 1.1789,
      "step": 11404
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8565713167190552,
      "learning_rate": 8.202161482160353e-06,
      "loss": 1.8731,
      "step": 11405
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3069250583648682,
      "learning_rate": 8.192648650985945e-06,
      "loss": 0.903,
      "step": 11406
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.03341007232666,
      "learning_rate": 8.183141103852743e-06,
      "loss": 1.8334,
      "step": 11407
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.494521141052246,
      "learning_rate": 8.173638841307951e-06,
      "loss": 1.5429,
      "step": 11408
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0553054809570312,
      "learning_rate": 8.1641418638985e-06,
      "loss": 1.656,
      "step": 11409
    },
    {
      "epoch": 0.87,
      "grad_norm": 4.415904998779297,
      "learning_rate": 8.154650172170975e-06,
      "loss": 2.3554,
      "step": 11410
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5957995653152466,
      "learning_rate": 8.145163766671692e-06,
      "loss": 1.2842,
      "step": 11411
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1718388795852661,
      "learning_rate": 8.135682647946664e-06,
      "loss": 1.4268,
      "step": 11412
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9914869666099548,
      "learning_rate": 8.126206816541548e-06,
      "loss": 1.0738,
      "step": 11413
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.191955327987671,
      "learning_rate": 8.116736273001746e-06,
      "loss": 0.7928,
      "step": 11414
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.974930763244629,
      "learning_rate": 8.107271017872365e-06,
      "loss": 1.2519,
      "step": 11415
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7495445013046265,
      "learning_rate": 8.097811051698157e-06,
      "loss": 1.9744,
      "step": 11416
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.646277904510498,
      "learning_rate": 8.08835637502362e-06,
      "loss": 1.7816,
      "step": 11417
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8280822038650513,
      "learning_rate": 8.07890698839291e-06,
      "loss": 0.7538,
      "step": 11418
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5091500282287598,
      "learning_rate": 8.069462892349888e-06,
      "loss": 1.0145,
      "step": 11419
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3451290130615234,
      "learning_rate": 8.060024087438111e-06,
      "loss": 0.9714,
      "step": 11420
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.198638677597046,
      "learning_rate": 8.050590574200878e-06,
      "loss": 0.8292,
      "step": 11421
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0637363195419312,
      "learning_rate": 8.041162353181087e-06,
      "loss": 1.5147,
      "step": 11422
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4779309034347534,
      "learning_rate": 8.031739424921415e-06,
      "loss": 1.234,
      "step": 11423
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2750170230865479,
      "learning_rate": 8.022321789964226e-06,
      "loss": 1.6614,
      "step": 11424
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8051636219024658,
      "learning_rate": 8.012909448851514e-06,
      "loss": 1.8339,
      "step": 11425
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5534143447875977,
      "learning_rate": 8.003502402125062e-06,
      "loss": 1.4347,
      "step": 11426
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4660441875457764,
      "learning_rate": 7.994100650326264e-06,
      "loss": 1.2848,
      "step": 11427
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.548640489578247,
      "learning_rate": 7.98470419399624e-06,
      "loss": 0.7879,
      "step": 11428
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.853878974914551,
      "learning_rate": 7.975313033675825e-06,
      "loss": 1.4344,
      "step": 11429
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.722832441329956,
      "learning_rate": 7.965927169905551e-06,
      "loss": 1.1428,
      "step": 11430
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9206280708312988,
      "learning_rate": 7.956546603225601e-06,
      "loss": 1.1456,
      "step": 11431
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1706148386001587,
      "learning_rate": 7.947171334175885e-06,
      "loss": 0.9672,
      "step": 11432
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.893800973892212,
      "learning_rate": 7.937801363296027e-06,
      "loss": 1.7114,
      "step": 11433
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.11155104637146,
      "learning_rate": 7.928436691125285e-06,
      "loss": 1.2924,
      "step": 11434
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7133723497390747,
      "learning_rate": 7.919077318202694e-06,
      "loss": 1.4302,
      "step": 11435
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.989414691925049,
      "learning_rate": 7.909723245066914e-06,
      "loss": 1.7818,
      "step": 11436
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9600377082824707,
      "learning_rate": 7.900374472256312e-06,
      "loss": 0.8964,
      "step": 11437
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.062858819961548,
      "learning_rate": 7.89103100030898e-06,
      "loss": 0.8977,
      "step": 11438
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.630944013595581,
      "learning_rate": 7.881692829762699e-06,
      "loss": 1.4722,
      "step": 11439
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.583573818206787,
      "learning_rate": 7.872359961154906e-06,
      "loss": 1.4482,
      "step": 11440
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3371776342391968,
      "learning_rate": 7.863032395022785e-06,
      "loss": 1.1871,
      "step": 11441
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.3611574172973633,
      "learning_rate": 7.853710131903203e-06,
      "loss": 2.1618,
      "step": 11442
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4075788259506226,
      "learning_rate": 7.844393172332665e-06,
      "loss": 1.2372,
      "step": 11443
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2692043781280518,
      "learning_rate": 7.835081516847465e-06,
      "loss": 1.0433,
      "step": 11444
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.714787483215332,
      "learning_rate": 7.825775165983506e-06,
      "loss": 1.7142,
      "step": 11445
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.057572841644287,
      "learning_rate": 7.81647412027644e-06,
      "loss": 1.5547,
      "step": 11446
    },
    {
      "epoch": 0.87,
      "grad_norm": 4.989019870758057,
      "learning_rate": 7.807178380261581e-06,
      "loss": 1.7933,
      "step": 11447
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1419284343719482,
      "learning_rate": 7.797887946473992e-06,
      "loss": 1.7845,
      "step": 11448
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0176655054092407,
      "learning_rate": 7.788602819448332e-06,
      "loss": 0.826,
      "step": 11449
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3907690048217773,
      "learning_rate": 7.77932299971904e-06,
      "loss": 2.0968,
      "step": 11450
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.1981067657470703,
      "learning_rate": 7.770048487820247e-06,
      "loss": 0.8842,
      "step": 11451
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0615894794464111,
      "learning_rate": 7.760779284285724e-06,
      "loss": 1.2431,
      "step": 11452
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.527581214904785,
      "learning_rate": 7.751515389648967e-06,
      "loss": 2.07,
      "step": 11453
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6880604028701782,
      "learning_rate": 7.742256804443194e-06,
      "loss": 1.4824,
      "step": 11454
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4382952451705933,
      "learning_rate": 7.733003529201278e-06,
      "loss": 1.5064,
      "step": 11455
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9872978925704956,
      "learning_rate": 7.72375556445577e-06,
      "loss": 1.1816,
      "step": 11456
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3697104454040527,
      "learning_rate": 7.71451291073898e-06,
      "loss": 1.1746,
      "step": 11457
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0885822772979736,
      "learning_rate": 7.705275568582848e-06,
      "loss": 1.2357,
      "step": 11458
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.442903757095337,
      "learning_rate": 7.696043538519048e-06,
      "loss": 0.5871,
      "step": 11459
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.737117052078247,
      "learning_rate": 7.686816821078958e-06,
      "loss": 1.212,
      "step": 11460
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6887524127960205,
      "learning_rate": 7.677595416793592e-06,
      "loss": 2.1183,
      "step": 11461
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6902728080749512,
      "learning_rate": 7.668379326193708e-06,
      "loss": 1.3066,
      "step": 11462
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.9384908080101013,
      "learning_rate": 7.659168549809769e-06,
      "loss": 0.9671,
      "step": 11463
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4074074029922485,
      "learning_rate": 7.649963088171885e-06,
      "loss": 1.0485,
      "step": 11464
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.0576330423355103,
      "learning_rate": 7.640762941809865e-06,
      "loss": 0.8903,
      "step": 11465
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2267768383026123,
      "learning_rate": 7.631568111253274e-06,
      "loss": 1.9349,
      "step": 11466
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3963807821273804,
      "learning_rate": 7.622378597031288e-06,
      "loss": 1.2038,
      "step": 11467
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3388687372207642,
      "learning_rate": 7.6131943996728295e-06,
      "loss": 1.273,
      "step": 11468
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6106637716293335,
      "learning_rate": 7.604015519706531e-06,
      "loss": 1.4302,
      "step": 11469
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3919963836669922,
      "learning_rate": 7.594841957660637e-06,
      "loss": 1.0533,
      "step": 11470
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1131023168563843,
      "learning_rate": 7.5856737140631685e-06,
      "loss": 0.7827,
      "step": 11471
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3836653232574463,
      "learning_rate": 7.576510789441837e-06,
      "loss": 0.9258,
      "step": 11472
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.421785831451416,
      "learning_rate": 7.5673531843239776e-06,
      "loss": 1.9116,
      "step": 11473
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.361716866493225,
      "learning_rate": 7.558200899236667e-06,
      "loss": 1.423,
      "step": 11474
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0104641914367676,
      "learning_rate": 7.549053934706707e-06,
      "loss": 0.8745,
      "step": 11475
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.275701642036438,
      "learning_rate": 7.5399122912605095e-06,
      "loss": 1.82,
      "step": 11476
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4576821327209473,
      "learning_rate": 7.530775969424253e-06,
      "loss": 1.1541,
      "step": 11477
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.459133267402649,
      "learning_rate": 7.521644969723807e-06,
      "loss": 1.34,
      "step": 11478
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3460910320281982,
      "learning_rate": 7.512519292684672e-06,
      "loss": 1.6746,
      "step": 11479
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6718180179595947,
      "learning_rate": 7.503398938832107e-06,
      "loss": 1.6777,
      "step": 11480
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.938582420349121,
      "learning_rate": 7.494283908691047e-06,
      "loss": 0.8405,
      "step": 11481
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1499115228652954,
      "learning_rate": 7.485174202786094e-06,
      "loss": 1.2952,
      "step": 11482
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7489709854125977,
      "learning_rate": 7.476069821641585e-06,
      "loss": 2.0236,
      "step": 11483
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6141247749328613,
      "learning_rate": 7.466970765781522e-06,
      "loss": 1.7869,
      "step": 11484
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4625420570373535,
      "learning_rate": 7.457877035729588e-06,
      "loss": 2.5179,
      "step": 11485
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5639742612838745,
      "learning_rate": 7.448788632009196e-06,
      "loss": 1.3231,
      "step": 11486
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.109018325805664,
      "learning_rate": 7.439705555143461e-06,
      "loss": 1.3593,
      "step": 11487
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.3380420207977295,
      "learning_rate": 7.43062780565511e-06,
      "loss": 1.2944,
      "step": 11488
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8737033605575562,
      "learning_rate": 7.421555384066669e-06,
      "loss": 1.7292,
      "step": 11489
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9430937170982361,
      "learning_rate": 7.412488290900288e-06,
      "loss": 0.8097,
      "step": 11490
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1699618101119995,
      "learning_rate": 7.403426526677825e-06,
      "loss": 0.6299,
      "step": 11491
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5825567245483398,
      "learning_rate": 7.394370091920866e-06,
      "loss": 0.9664,
      "step": 11492
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9395006895065308,
      "learning_rate": 7.385318987150635e-06,
      "loss": 1.6658,
      "step": 11493
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8384957313537598,
      "learning_rate": 7.376273212888063e-06,
      "loss": 0.7967,
      "step": 11494
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7958009243011475,
      "learning_rate": 7.367232769653798e-06,
      "loss": 2.2448,
      "step": 11495
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.037684202194214,
      "learning_rate": 7.358197657968202e-06,
      "loss": 1.246,
      "step": 11496
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3612713813781738,
      "learning_rate": 7.349167878351249e-06,
      "loss": 0.7559,
      "step": 11497
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1621544361114502,
      "learning_rate": 7.340143431322666e-06,
      "loss": 1.2644,
      "step": 11498
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5111174583435059,
      "learning_rate": 7.331124317401905e-06,
      "loss": 1.8853,
      "step": 11499
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0994333028793335,
      "learning_rate": 7.322110537108007e-06,
      "loss": 1.2917,
      "step": 11500
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.536128282546997,
      "learning_rate": 7.313102090959811e-06,
      "loss": 1.9546,
      "step": 11501
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6447386741638184,
      "learning_rate": 7.304098979475793e-06,
      "loss": 0.6933,
      "step": 11502
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2709547281265259,
      "learning_rate": 7.295101203174104e-06,
      "loss": 1.7008,
      "step": 11503
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6537871360778809,
      "learning_rate": 7.286108762572652e-06,
      "loss": 1.4844,
      "step": 11504
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1985481977462769,
      "learning_rate": 7.277121658189001e-06,
      "loss": 1.2467,
      "step": 11505
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0663758516311646,
      "learning_rate": 7.268139890540404e-06,
      "loss": 1.4803,
      "step": 11506
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.291062831878662,
      "learning_rate": 7.259163460143803e-06,
      "loss": 1.9699,
      "step": 11507
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.412243127822876,
      "learning_rate": 7.250192367515873e-06,
      "loss": 1.4121,
      "step": 11508
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1743903160095215,
      "learning_rate": 7.241226613172913e-06,
      "loss": 1.5007,
      "step": 11509
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6629557609558105,
      "learning_rate": 7.232266197630988e-06,
      "loss": 1.0647,
      "step": 11510
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0728121995925903,
      "learning_rate": 7.223311121405796e-06,
      "loss": 2.0515,
      "step": 11511
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0551931858062744,
      "learning_rate": 7.21436138501278e-06,
      "loss": 1.4455,
      "step": 11512
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.634626626968384,
      "learning_rate": 7.2054169889670066e-06,
      "loss": 1.4629,
      "step": 11513
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9541669487953186,
      "learning_rate": 7.19647793378333e-06,
      "loss": 1.4292,
      "step": 11514
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.8262925148010254,
      "learning_rate": 7.187544219976205e-06,
      "loss": 1.5294,
      "step": 11515
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2574694156646729,
      "learning_rate": 7.178615848059822e-06,
      "loss": 1.8003,
      "step": 11516
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5429755449295044,
      "learning_rate": 7.169692818548091e-06,
      "loss": 0.8489,
      "step": 11517
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.240042805671692,
      "learning_rate": 7.160775131954545e-06,
      "loss": 1.256,
      "step": 11518
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.642519474029541,
      "learning_rate": 7.151862788792463e-06,
      "loss": 0.8916,
      "step": 11519
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.334916353225708,
      "learning_rate": 7.142955789574823e-06,
      "loss": 0.8334,
      "step": 11520
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.871625304222107,
      "learning_rate": 7.13405413481425e-06,
      "loss": 1.202,
      "step": 11521
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.652580976486206,
      "learning_rate": 7.125157825023076e-06,
      "loss": 1.4497,
      "step": 11522
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.639936923980713,
      "learning_rate": 7.116266860713372e-06,
      "loss": 1.1501,
      "step": 11523
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1755789518356323,
      "learning_rate": 7.107381242396816e-06,
      "loss": 1.382,
      "step": 11524
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.465941309928894,
      "learning_rate": 7.098500970584854e-06,
      "loss": 1.9314,
      "step": 11525
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.116210699081421,
      "learning_rate": 7.089626045788611e-06,
      "loss": 1.3326,
      "step": 11526
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.2487521171569824,
      "learning_rate": 7.080756468518857e-06,
      "loss": 1.4877,
      "step": 11527
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9001878499984741,
      "learning_rate": 7.071892239286104e-06,
      "loss": 1.9939,
      "step": 11528
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4735924005508423,
      "learning_rate": 7.0630333586005574e-06,
      "loss": 1.4831,
      "step": 11529
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4878859519958496,
      "learning_rate": 7.054179826972074e-06,
      "loss": 1.0605,
      "step": 11530
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7854050397872925,
      "learning_rate": 7.045331644910213e-06,
      "loss": 0.8817,
      "step": 11531
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.575711965560913,
      "learning_rate": 7.03648881292428e-06,
      "loss": 1.1191,
      "step": 11532
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0327423810958862,
      "learning_rate": 7.027651331523178e-06,
      "loss": 0.9301,
      "step": 11533
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.102653980255127,
      "learning_rate": 7.018819201215587e-06,
      "loss": 1.6383,
      "step": 11534
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6992461681365967,
      "learning_rate": 7.009992422509859e-06,
      "loss": 0.7245,
      "step": 11535
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6166589260101318,
      "learning_rate": 7.001170995913997e-06,
      "loss": 1.4622,
      "step": 11536
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.578696846961975,
      "learning_rate": 6.992354921935729e-06,
      "loss": 1.149,
      "step": 11537
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6332184076309204,
      "learning_rate": 6.983544201082503e-06,
      "loss": 1.1712,
      "step": 11538
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.167489767074585,
      "learning_rate": 6.974738833861383e-06,
      "loss": 1.5018,
      "step": 11539
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2918857336044312,
      "learning_rate": 6.965938820779205e-06,
      "loss": 1.3111,
      "step": 11540
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.128834843635559,
      "learning_rate": 6.957144162342421e-06,
      "loss": 1.5711,
      "step": 11541
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.547729015350342,
      "learning_rate": 6.94835485905726e-06,
      "loss": 2.0243,
      "step": 11542
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.5131750106811523,
      "learning_rate": 6.9395709114295605e-06,
      "loss": 0.8447,
      "step": 11543
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7541307210922241,
      "learning_rate": 6.930792319964896e-06,
      "loss": 1.3307,
      "step": 11544
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1639032363891602,
      "learning_rate": 6.9220190851685516e-06,
      "loss": 1.1814,
      "step": 11545
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.75320565700531,
      "learning_rate": 6.913251207545446e-06,
      "loss": 1.7192,
      "step": 11546
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.915467619895935,
      "learning_rate": 6.904488687600219e-06,
      "loss": 1.5946,
      "step": 11547
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6242035627365112,
      "learning_rate": 6.895731525837245e-06,
      "loss": 1.4088,
      "step": 11548
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1831045150756836,
      "learning_rate": 6.886979722760511e-06,
      "loss": 1.8827,
      "step": 11549
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1762144565582275,
      "learning_rate": 6.878233278873736e-06,
      "loss": 1.3488,
      "step": 11550
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6045308113098145,
      "learning_rate": 6.869492194680349e-06,
      "loss": 1.4052,
      "step": 11551
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.490285873413086,
      "learning_rate": 6.860756470683427e-06,
      "loss": 1.3563,
      "step": 11552
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1766895055770874,
      "learning_rate": 6.852026107385756e-06,
      "loss": 1.2373,
      "step": 11553
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.1480953693389893,
      "learning_rate": 6.843301105289868e-06,
      "loss": 1.2771,
      "step": 11554
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3125977516174316,
      "learning_rate": 6.834581464897871e-06,
      "loss": 1.6756,
      "step": 11555
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6061989068984985,
      "learning_rate": 6.825867186711677e-06,
      "loss": 1.34,
      "step": 11556
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6939948797225952,
      "learning_rate": 6.817158271232826e-06,
      "loss": 1.976,
      "step": 11557
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8648735284805298,
      "learning_rate": 6.808454718962587e-06,
      "loss": 1.0942,
      "step": 11558
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.106923818588257,
      "learning_rate": 6.799756530401846e-06,
      "loss": 1.4382,
      "step": 11559
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5752835273742676,
      "learning_rate": 6.7910637060512924e-06,
      "loss": 1.5812,
      "step": 11560
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.486608862876892,
      "learning_rate": 6.782376246411215e-06,
      "loss": 1.278,
      "step": 11561
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3537358045578003,
      "learning_rate": 6.7736941519816355e-06,
      "loss": 1.0565,
      "step": 11562
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6816540956497192,
      "learning_rate": 6.765017423262266e-06,
      "loss": 0.8397,
      "step": 11563
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5117186307907104,
      "learning_rate": 6.756346060752483e-06,
      "loss": 1.6238,
      "step": 11564
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.205686330795288,
      "learning_rate": 6.747680064951389e-06,
      "loss": 2.0189,
      "step": 11565
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3501261472702026,
      "learning_rate": 6.739019436357774e-06,
      "loss": 1.2721,
      "step": 11566
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.060837745666504,
      "learning_rate": 6.730364175470083e-06,
      "loss": 1.5718,
      "step": 11567
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.5281379222869873,
      "learning_rate": 6.721714282786484e-06,
      "loss": 1.6915,
      "step": 11568
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2533116340637207,
      "learning_rate": 6.713069758804835e-06,
      "loss": 1.5421,
      "step": 11569
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5572158098220825,
      "learning_rate": 6.704430604022649e-06,
      "loss": 1.2418,
      "step": 11570
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2358596324920654,
      "learning_rate": 6.695796818937194e-06,
      "loss": 1.3589,
      "step": 11571
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5452736616134644,
      "learning_rate": 6.687168404045396e-06,
      "loss": 1.5514,
      "step": 11572
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5280841588974,
      "learning_rate": 6.678545359843835e-06,
      "loss": 1.2903,
      "step": 11573
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.0689246654510498,
      "learning_rate": 6.669927686828836e-06,
      "loss": 1.2045,
      "step": 11574
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.076742172241211,
      "learning_rate": 6.661315385496425e-06,
      "loss": 1.2159,
      "step": 11575
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7282257080078125,
      "learning_rate": 6.652708456342238e-06,
      "loss": 1.7068,
      "step": 11576
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5011351108551025,
      "learning_rate": 6.644106899861691e-06,
      "loss": 1.1266,
      "step": 11577
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3773447275161743,
      "learning_rate": 6.635510716549842e-06,
      "loss": 1.4603,
      "step": 11578
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.8925493955612183,
      "learning_rate": 6.6269199069014294e-06,
      "loss": 0.4096,
      "step": 11579
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2408679723739624,
      "learning_rate": 6.618334471410925e-06,
      "loss": 1.1574,
      "step": 11580
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.5829641819000244,
      "learning_rate": 6.609754410572478e-06,
      "loss": 1.4571,
      "step": 11581
    },
    {
      "epoch": 0.88,
      "grad_norm": 4.505234718322754,
      "learning_rate": 6.601179724879902e-06,
      "loss": 2.311,
      "step": 11582
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2277369499206543,
      "learning_rate": 6.592610414826716e-06,
      "loss": 1.2653,
      "step": 11583
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.816718339920044,
      "learning_rate": 6.5840464809061694e-06,
      "loss": 1.5016,
      "step": 11584
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3021860122680664,
      "learning_rate": 6.575487923611124e-06,
      "loss": 1.1492,
      "step": 11585
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.9899399876594543,
      "learning_rate": 6.566934743434205e-06,
      "loss": 1.4088,
      "step": 11586
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3030810356140137,
      "learning_rate": 6.558386940867678e-06,
      "loss": 1.3584,
      "step": 11587
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8308531045913696,
      "learning_rate": 6.5498445164035134e-06,
      "loss": 1.5096,
      "step": 11588
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.513155698776245,
      "learning_rate": 6.5413074705333865e-06,
      "loss": 0.8343,
      "step": 11589
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3713418245315552,
      "learning_rate": 6.5327758037486585e-06,
      "loss": 1.4553,
      "step": 11590
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2539106607437134,
      "learning_rate": 6.52424951654037e-06,
      "loss": 1.3884,
      "step": 11591
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3716055154800415,
      "learning_rate": 6.5157286093992524e-06,
      "loss": 0.878,
      "step": 11592
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8293652534484863,
      "learning_rate": 6.507213082815744e-06,
      "loss": 0.9692,
      "step": 11593
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1708500385284424,
      "learning_rate": 6.498702937279955e-06,
      "loss": 1.8657,
      "step": 11594
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4141100645065308,
      "learning_rate": 6.490198173281703e-06,
      "loss": 1.7941,
      "step": 11595
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7390925884246826,
      "learning_rate": 6.481698791310486e-06,
      "loss": 1.1458,
      "step": 11596
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3791553974151611,
      "learning_rate": 6.47320479185547e-06,
      "loss": 0.9715,
      "step": 11597
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3110244274139404,
      "learning_rate": 6.464716175405538e-06,
      "loss": 1.4729,
      "step": 11598
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.571094036102295,
      "learning_rate": 6.456232942449292e-06,
      "loss": 1.5034,
      "step": 11599
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5928751230239868,
      "learning_rate": 6.44775509347495e-06,
      "loss": 1.1769,
      "step": 11600
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1046431064605713,
      "learning_rate": 6.439282628970477e-06,
      "loss": 1.3728,
      "step": 11601
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9397501945495605,
      "learning_rate": 6.430815549423541e-06,
      "loss": 1.0278,
      "step": 11602
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.306955337524414,
      "learning_rate": 6.4223538553214165e-06,
      "loss": 1.0495,
      "step": 11603
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.097689390182495,
      "learning_rate": 6.413897547151171e-06,
      "loss": 0.9575,
      "step": 11604
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2324554920196533,
      "learning_rate": 6.405446625399481e-06,
      "loss": 1.5464,
      "step": 11605
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4326848983764648,
      "learning_rate": 6.397001090552768e-06,
      "loss": 1.2059,
      "step": 11606
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2650368213653564,
      "learning_rate": 6.388560943097099e-06,
      "loss": 1.2154,
      "step": 11607
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.305103063583374,
      "learning_rate": 6.380126183518276e-06,
      "loss": 1.6515,
      "step": 11608
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1134941577911377,
      "learning_rate": 6.371696812301742e-06,
      "loss": 1.1523,
      "step": 11609
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.896700143814087,
      "learning_rate": 6.363272829932676e-06,
      "loss": 1.4849,
      "step": 11610
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6049408912658691,
      "learning_rate": 6.354854236895935e-06,
      "loss": 1.5612,
      "step": 11611
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.109584093093872,
      "learning_rate": 6.34644103367602e-06,
      "loss": 1.1261,
      "step": 11612
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6564821004867554,
      "learning_rate": 6.3380332207572e-06,
      "loss": 1.7663,
      "step": 11613
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.567824125289917,
      "learning_rate": 6.329630798623376e-06,
      "loss": 1.7218,
      "step": 11614
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2939749956130981,
      "learning_rate": 6.321233767758161e-06,
      "loss": 1.4087,
      "step": 11615
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.602126121520996,
      "learning_rate": 6.3128421286448355e-06,
      "loss": 1.8921,
      "step": 11616
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2420912981033325,
      "learning_rate": 6.304455881766402e-06,
      "loss": 1.1081,
      "step": 11617
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1995457410812378,
      "learning_rate": 6.29607502760553e-06,
      "loss": 1.0845,
      "step": 11618
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.85209321975708,
      "learning_rate": 6.287699566644589e-06,
      "loss": 1.3964,
      "step": 11619
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.234320878982544,
      "learning_rate": 6.2793294993656494e-06,
      "loss": 1.7433,
      "step": 11620
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9296481609344482,
      "learning_rate": 6.270964826250425e-06,
      "loss": 0.9215,
      "step": 11621
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.172135591506958,
      "learning_rate": 6.2626055477803646e-06,
      "loss": 1.9284,
      "step": 11622
    },
    {
      "epoch": 0.89,
      "grad_norm": 4.757959365844727,
      "learning_rate": 6.254251664436617e-06,
      "loss": 1.3728,
      "step": 11623
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2720372676849365,
      "learning_rate": 6.245903176699974e-06,
      "loss": 0.6892,
      "step": 11624
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3663562536239624,
      "learning_rate": 6.2375600850509306e-06,
      "loss": 1.2836,
      "step": 11625
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2590737342834473,
      "learning_rate": 6.229222389969702e-06,
      "loss": 1.4199,
      "step": 11626
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.295493245124817,
      "learning_rate": 6.220890091936138e-06,
      "loss": 1.4249,
      "step": 11627
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.598854660987854,
      "learning_rate": 6.212563191429843e-06,
      "loss": 1.2907,
      "step": 11628
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5874215364456177,
      "learning_rate": 6.204241688930068e-06,
      "loss": 1.3059,
      "step": 11629
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.571553349494934,
      "learning_rate": 6.195925584915752e-06,
      "loss": 1.8192,
      "step": 11630
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5549347400665283,
      "learning_rate": 6.187614879865544e-06,
      "loss": 1.0124,
      "step": 11631
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.695796251296997,
      "learning_rate": 6.179309574257797e-06,
      "loss": 0.8106,
      "step": 11632
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3246699571609497,
      "learning_rate": 6.171009668570493e-06,
      "loss": 1.1542,
      "step": 11633
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1731371879577637,
      "learning_rate": 6.162715163281341e-06,
      "loss": 1.6483,
      "step": 11634
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9345983266830444,
      "learning_rate": 6.1544260588677575e-06,
      "loss": 1.1766,
      "step": 11635
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8934870362281799,
      "learning_rate": 6.1461423558068056e-06,
      "loss": 0.8622,
      "step": 11636
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2364078760147095,
      "learning_rate": 6.137864054575282e-06,
      "loss": 1.5419,
      "step": 11637
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1213237047195435,
      "learning_rate": 6.12959115564965e-06,
      "loss": 1.1355,
      "step": 11638
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.0112102031707764,
      "learning_rate": 6.12132365950604e-06,
      "loss": 1.6241,
      "step": 11639
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4654520750045776,
      "learning_rate": 6.113061566620304e-06,
      "loss": 1.709,
      "step": 11640
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5213714838027954,
      "learning_rate": 6.104804877467995e-06,
      "loss": 0.8843,
      "step": 11641
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.221885085105896,
      "learning_rate": 6.09655359252429e-06,
      "loss": 1.6846,
      "step": 11642
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2421653270721436,
      "learning_rate": 6.088307712264152e-06,
      "loss": 1.6795,
      "step": 11643
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.44615638256073,
      "learning_rate": 6.080067237162146e-06,
      "loss": 1.4615,
      "step": 11644
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.535754680633545,
      "learning_rate": 6.071832167692548e-06,
      "loss": 0.8501,
      "step": 11645
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.13311767578125,
      "learning_rate": 6.063602504329346e-06,
      "loss": 1.223,
      "step": 11646
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6854493618011475,
      "learning_rate": 6.055378247546218e-06,
      "loss": 1.9785,
      "step": 11647
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7298871278762817,
      "learning_rate": 6.0471593978164935e-06,
      "loss": 1.7376,
      "step": 11648
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.565118670463562,
      "learning_rate": 6.038945955613229e-06,
      "loss": 0.7089,
      "step": 11649
    },
    {
      "epoch": 0.89,
      "grad_norm": 4.929035663604736,
      "learning_rate": 6.030737921409169e-06,
      "loss": 1.7469,
      "step": 11650
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6469485759735107,
      "learning_rate": 6.0225352956767016e-06,
      "loss": 1.3978,
      "step": 11651
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1340997219085693,
      "learning_rate": 6.01433807888796e-06,
      "loss": 0.8885,
      "step": 11652
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4377360343933105,
      "learning_rate": 6.006146271514734e-06,
      "loss": 1.5086,
      "step": 11653
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.285959005355835,
      "learning_rate": 5.997959874028503e-06,
      "loss": 1.4216,
      "step": 11654
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.327207088470459,
      "learning_rate": 5.989778886900432e-06,
      "loss": 1.2936,
      "step": 11655
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3480584621429443,
      "learning_rate": 5.981603310601414e-06,
      "loss": 0.8997,
      "step": 11656
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7521162033081055,
      "learning_rate": 5.973433145601981e-06,
      "loss": 1.8024,
      "step": 11657
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1461433172225952,
      "learning_rate": 5.965268392372358e-06,
      "loss": 1.1921,
      "step": 11658
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.814693570137024,
      "learning_rate": 5.957109051382514e-06,
      "loss": 1.2912,
      "step": 11659
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0437211990356445,
      "learning_rate": 5.9489551231020294e-06,
      "loss": 2.0606,
      "step": 11660
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4711058139801025,
      "learning_rate": 5.940806608000249e-06,
      "loss": 1.1573,
      "step": 11661
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5714665651321411,
      "learning_rate": 5.9326635065461215e-06,
      "loss": 1.1246,
      "step": 11662
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4574379920959473,
      "learning_rate": 5.9245258192083484e-06,
      "loss": 1.3706,
      "step": 11663
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0660905838012695,
      "learning_rate": 5.916393546455301e-06,
      "loss": 1.9589,
      "step": 11664
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.352428674697876,
      "learning_rate": 5.908266688755049e-06,
      "loss": 1.5531,
      "step": 11665
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.89158296585083,
      "learning_rate": 5.900145246575306e-06,
      "loss": 1.4621,
      "step": 11666
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.3063807487487793,
      "learning_rate": 5.8920292203835435e-06,
      "loss": 2.2538,
      "step": 11667
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3257955312728882,
      "learning_rate": 5.883918610646877e-06,
      "loss": 1.3441,
      "step": 11668
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2946608066558838,
      "learning_rate": 5.87581341783211e-06,
      "loss": 1.6617,
      "step": 11669
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.285673141479492,
      "learning_rate": 5.867713642405747e-06,
      "loss": 1.433,
      "step": 11670
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.9320412278175354,
      "learning_rate": 5.859619284833983e-06,
      "loss": 0.9449,
      "step": 11671
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.22914719581604,
      "learning_rate": 5.851530345582701e-06,
      "loss": 0.5948,
      "step": 11672
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8957620859146118,
      "learning_rate": 5.843446825117438e-06,
      "loss": 1.5045,
      "step": 11673
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6380846500396729,
      "learning_rate": 5.835368723903456e-06,
      "loss": 1.5087,
      "step": 11674
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1614817380905151,
      "learning_rate": 5.827296042405717e-06,
      "loss": 1.4178,
      "step": 11675
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.826815605163574,
      "learning_rate": 5.819228781088826e-06,
      "loss": 2.0391,
      "step": 11676
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2427715063095093,
      "learning_rate": 5.811166940417112e-06,
      "loss": 1.0237,
      "step": 11677
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3025093078613281,
      "learning_rate": 5.803110520854593e-06,
      "loss": 1.2175,
      "step": 11678
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.423158884048462,
      "learning_rate": 5.795059522864932e-06,
      "loss": 1.2283,
      "step": 11679
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.215226173400879,
      "learning_rate": 5.787013946911546e-06,
      "loss": 1.1826,
      "step": 11680
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5931793451309204,
      "learning_rate": 5.778973793457487e-06,
      "loss": 1.5489,
      "step": 11681
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2177101373672485,
      "learning_rate": 5.770939062965497e-06,
      "loss": 0.826,
      "step": 11682
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.8276817798614502,
      "learning_rate": 5.762909755898027e-06,
      "loss": 0.5874,
      "step": 11683
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.070226788520813,
      "learning_rate": 5.75488587271723e-06,
      "loss": 1.5652,
      "step": 11684
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2945197820663452,
      "learning_rate": 5.746867413884904e-06,
      "loss": 0.517,
      "step": 11685
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6330807209014893,
      "learning_rate": 5.738854379862568e-06,
      "loss": 1.5376,
      "step": 11686
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.9253337383270264,
      "learning_rate": 5.73084677111142e-06,
      "loss": 1.2629,
      "step": 11687
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2730026245117188,
      "learning_rate": 5.722844588092335e-06,
      "loss": 1.3844,
      "step": 11688
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8973125219345093,
      "learning_rate": 5.714847831265913e-06,
      "loss": 1.1729,
      "step": 11689
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3230628967285156,
      "learning_rate": 5.706856501092383e-06,
      "loss": 1.2378,
      "step": 11690
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.0292167663574219,
      "learning_rate": 5.698870598031692e-06,
      "loss": 1.0245,
      "step": 11691
    },
    {
      "epoch": 0.89,
      "grad_norm": 4.843549728393555,
      "learning_rate": 5.690890122543469e-06,
      "loss": 1.4812,
      "step": 11692
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1164121627807617,
      "learning_rate": 5.682915075087081e-06,
      "loss": 0.4104,
      "step": 11693
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6303961277008057,
      "learning_rate": 5.674945456121483e-06,
      "loss": 1.236,
      "step": 11694
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0940587520599365,
      "learning_rate": 5.666981266105398e-06,
      "loss": 1.3909,
      "step": 11695
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5433921813964844,
      "learning_rate": 5.659022505497225e-06,
      "loss": 1.5289,
      "step": 11696
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.864622950553894,
      "learning_rate": 5.651069174755008e-06,
      "loss": 1.2118,
      "step": 11697
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5552587509155273,
      "learning_rate": 5.643121274336527e-06,
      "loss": 1.4204,
      "step": 11698
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4480096101760864,
      "learning_rate": 5.635178804699215e-06,
      "loss": 1.6347,
      "step": 11699
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6600571870803833,
      "learning_rate": 5.6272417663002285e-06,
      "loss": 1.2298,
      "step": 11700
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.044560670852661,
      "learning_rate": 5.6193101595963585e-06,
      "loss": 1.9691,
      "step": 11701
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1148312091827393,
      "learning_rate": 5.611383985044139e-06,
      "loss": 1.1871,
      "step": 11702
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1608405113220215,
      "learning_rate": 5.6034632430997606e-06,
      "loss": 1.2485,
      "step": 11703
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2218143939971924,
      "learning_rate": 5.595547934219092e-06,
      "loss": 1.0639,
      "step": 11704
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7268611192703247,
      "learning_rate": 5.587638058857736e-06,
      "loss": 1.2954,
      "step": 11705
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3887404203414917,
      "learning_rate": 5.579733617470928e-06,
      "loss": 0.8745,
      "step": 11706
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1724486351013184,
      "learning_rate": 5.571834610513615e-06,
      "loss": 1.2383,
      "step": 11707
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3880791664123535,
      "learning_rate": 5.5639410384404455e-06,
      "loss": 1.2781,
      "step": 11708
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2498260736465454,
      "learning_rate": 5.556052901705733e-06,
      "loss": 1.6557,
      "step": 11709
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7790684700012207,
      "learning_rate": 5.54817020076347e-06,
      "loss": 1.364,
      "step": 11710
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.7568225860595703,
      "learning_rate": 5.540292936067381e-06,
      "loss": 1.6548,
      "step": 11711
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.88572359085083,
      "learning_rate": 5.5324211080708175e-06,
      "loss": 1.1913,
      "step": 11712
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2370221614837646,
      "learning_rate": 5.5245547172268485e-06,
      "loss": 0.9821,
      "step": 11713
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2228353023529053,
      "learning_rate": 5.516693763988268e-06,
      "loss": 1.3256,
      "step": 11714
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.745373249053955,
      "learning_rate": 5.508838248807479e-06,
      "loss": 1.9097,
      "step": 11715
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.671952724456787,
      "learning_rate": 5.500988172136623e-06,
      "loss": 1.3876,
      "step": 11716
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4352269172668457,
      "learning_rate": 5.493143534427547e-06,
      "loss": 1.6241,
      "step": 11717
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5634208917617798,
      "learning_rate": 5.485304336131714e-06,
      "loss": 1.7088,
      "step": 11718
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5177351236343384,
      "learning_rate": 5.477470577700328e-06,
      "loss": 1.2998,
      "step": 11719
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.1100343465805054,
      "learning_rate": 5.469642259584273e-06,
      "loss": 0.8447,
      "step": 11720
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6591683626174927,
      "learning_rate": 5.4618193822341014e-06,
      "loss": 1.2092,
      "step": 11721
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3689857721328735,
      "learning_rate": 5.454001946100062e-06,
      "loss": 1.4465,
      "step": 11722
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6109373569488525,
      "learning_rate": 5.44618995163213e-06,
      "loss": 1.3599,
      "step": 11723
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5515785217285156,
      "learning_rate": 5.438383399279889e-06,
      "loss": 1.0406,
      "step": 11724
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.2227758169174194,
      "learning_rate": 5.430582289492659e-06,
      "loss": 1.0574,
      "step": 11725
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0365896224975586,
      "learning_rate": 5.422786622719467e-06,
      "loss": 1.3868,
      "step": 11726
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2522244453430176,
      "learning_rate": 5.414996399408978e-06,
      "loss": 2.0392,
      "step": 11727
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.7090039253234863,
      "learning_rate": 5.407211620009544e-06,
      "loss": 1.7881,
      "step": 11728
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.708256721496582,
      "learning_rate": 5.399432284969263e-06,
      "loss": 1.4239,
      "step": 11729
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6432552337646484,
      "learning_rate": 5.391658394735855e-06,
      "loss": 1.3392,
      "step": 11730
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.289367198944092,
      "learning_rate": 5.383889949756749e-06,
      "loss": 1.4904,
      "step": 11731
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0972869396209717,
      "learning_rate": 5.3761269504790876e-06,
      "loss": 1.1536,
      "step": 11732
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0607779026031494,
      "learning_rate": 5.368369397349648e-06,
      "loss": 1.2995,
      "step": 11733
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6596087217330933,
      "learning_rate": 5.3606172908149395e-06,
      "loss": 1.8865,
      "step": 11734
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4381451606750488,
      "learning_rate": 5.352870631321139e-06,
      "loss": 1.1329,
      "step": 11735
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.215254545211792,
      "learning_rate": 5.34512941931411e-06,
      "loss": 1.3798,
      "step": 11736
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5931705236434937,
      "learning_rate": 5.337393655239409e-06,
      "loss": 1.7603,
      "step": 11737
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4007107019424438,
      "learning_rate": 5.329663339542268e-06,
      "loss": 1.2986,
      "step": 11738
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6868793964385986,
      "learning_rate": 5.321938472667598e-06,
      "loss": 1.3073,
      "step": 11739
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9414805173873901,
      "learning_rate": 5.314219055060022e-06,
      "loss": 1.9525,
      "step": 11740
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.7497445344924927,
      "learning_rate": 5.306505087163849e-06,
      "loss": 0.7377,
      "step": 11741
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6092121601104736,
      "learning_rate": 5.298796569423037e-06,
      "loss": 1.1225,
      "step": 11742
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4946647882461548,
      "learning_rate": 5.291093502281264e-06,
      "loss": 1.0917,
      "step": 11743
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.314209222793579,
      "learning_rate": 5.283395886181908e-06,
      "loss": 1.1196,
      "step": 11744
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3903782367706299,
      "learning_rate": 5.275703721567982e-06,
      "loss": 1.3903,
      "step": 11745
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.287310838699341,
      "learning_rate": 5.2680170088822425e-06,
      "loss": 1.3451,
      "step": 11746
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2317508459091187,
      "learning_rate": 5.26033574856708e-06,
      "loss": 1.1847,
      "step": 11747
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.768160581588745,
      "learning_rate": 5.252659941064597e-06,
      "loss": 1.2793,
      "step": 11748
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9573437571525574,
      "learning_rate": 5.2449895868165845e-06,
      "loss": 0.8915,
      "step": 11749
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3684027194976807,
      "learning_rate": 5.237324686264522e-06,
      "loss": 1.1705,
      "step": 11750
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5717345476150513,
      "learning_rate": 5.229665239849557e-06,
      "loss": 1.7027,
      "step": 11751
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9912080764770508,
      "learning_rate": 5.222011248012537e-06,
      "loss": 1.9758,
      "step": 11752
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5410852432250977,
      "learning_rate": 5.214362711194021e-06,
      "loss": 1.4405,
      "step": 11753
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7164148092269897,
      "learning_rate": 5.206719629834178e-06,
      "loss": 1.276,
      "step": 11754
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1647984981536865,
      "learning_rate": 5.199082004372957e-06,
      "loss": 0.8059,
      "step": 11755
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1749026775360107,
      "learning_rate": 5.1914498352499285e-06,
      "loss": 1.5074,
      "step": 11756
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.024078130722046,
      "learning_rate": 5.183823122904352e-06,
      "loss": 1.0065,
      "step": 11757
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5580086708068848,
      "learning_rate": 5.176201867775199e-06,
      "loss": 1.5915,
      "step": 11758
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1955269575119019,
      "learning_rate": 5.168586070301129e-06,
      "loss": 0.9642,
      "step": 11759
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1903281211853027,
      "learning_rate": 5.160975730920459e-06,
      "loss": 1.1536,
      "step": 11760
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.7273497581481934,
      "learning_rate": 5.1533708500712156e-06,
      "loss": 1.6185,
      "step": 11761
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9557351469993591,
      "learning_rate": 5.145771428191115e-06,
      "loss": 1.1811,
      "step": 11762
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8102055788040161,
      "learning_rate": 5.13817746571752e-06,
      "loss": 1.5744,
      "step": 11763
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.345570683479309,
      "learning_rate": 5.1305889630875344e-06,
      "loss": 1.2578,
      "step": 11764
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.951770544052124,
      "learning_rate": 5.123005920737889e-06,
      "loss": 1.8326,
      "step": 11765
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.043391227722168,
      "learning_rate": 5.115428339105078e-06,
      "loss": 1.5066,
      "step": 11766
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.320332407951355,
      "learning_rate": 5.107856218625184e-06,
      "loss": 1.3023,
      "step": 11767
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3420941829681396,
      "learning_rate": 5.100289559734062e-06,
      "loss": 1.357,
      "step": 11768
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2305999994277954,
      "learning_rate": 5.092728362867194e-06,
      "loss": 1.3679,
      "step": 11769
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2721648216247559,
      "learning_rate": 5.085172628459778e-06,
      "loss": 1.2064,
      "step": 11770
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.178583025932312,
      "learning_rate": 5.07762235694671e-06,
      "loss": 1.7037,
      "step": 11771
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.799465298652649,
      "learning_rate": 5.07007754876252e-06,
      "loss": 0.6168,
      "step": 11772
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4541443586349487,
      "learning_rate": 5.0625382043414715e-06,
      "loss": 1.986,
      "step": 11773
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2569772005081177,
      "learning_rate": 5.055004324117507e-06,
      "loss": 0.9362,
      "step": 11774
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.085070252418518,
      "learning_rate": 5.047475908524224e-06,
      "loss": 1.0392,
      "step": 11775
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7608147859573364,
      "learning_rate": 5.039952957994931e-06,
      "loss": 1.6727,
      "step": 11776
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1687501668930054,
      "learning_rate": 5.032435472962627e-06,
      "loss": 1.3709,
      "step": 11777
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.180034637451172,
      "learning_rate": 5.024923453859976e-06,
      "loss": 0.8086,
      "step": 11778
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.116407871246338,
      "learning_rate": 5.0174169011193315e-06,
      "loss": 1.0245,
      "step": 11779
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.059668779373169,
      "learning_rate": 5.009915815172772e-06,
      "loss": 1.3728,
      "step": 11780
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.528346061706543,
      "learning_rate": 5.002420196451985e-06,
      "loss": 1.017,
      "step": 11781
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1901307106018066,
      "learning_rate": 4.994930045388413e-06,
      "loss": 1.4939,
      "step": 11782
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3336822986602783,
      "learning_rate": 4.987445362413157e-06,
      "loss": 1.7137,
      "step": 11783
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.218387246131897,
      "learning_rate": 4.979966147957005e-06,
      "loss": 1.1899,
      "step": 11784
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3379361629486084,
      "learning_rate": 4.972492402450402e-06,
      "loss": 1.5363,
      "step": 11785
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.742022752761841,
      "learning_rate": 4.9650241263235365e-06,
      "loss": 2.6017,
      "step": 11786
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2906622886657715,
      "learning_rate": 4.957561320006232e-06,
      "loss": 1.5593,
      "step": 11787
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1125706434249878,
      "learning_rate": 4.950103983928023e-06,
      "loss": 1.0171,
      "step": 11788
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0701119899749756,
      "learning_rate": 4.942652118518132e-06,
      "loss": 1.0768,
      "step": 11789
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1332099437713623,
      "learning_rate": 4.935205724205438e-06,
      "loss": 1.2576,
      "step": 11790
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9352349042892456,
      "learning_rate": 4.927764801418522e-06,
      "loss": 0.7919,
      "step": 11791
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2301836013793945,
      "learning_rate": 4.920329350585684e-06,
      "loss": 1.6043,
      "step": 11792
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2934077978134155,
      "learning_rate": 4.91289937213486e-06,
      "loss": 0.9795,
      "step": 11793
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4962379932403564,
      "learning_rate": 4.905474866493664e-06,
      "loss": 1.4756,
      "step": 11794
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5685710906982422,
      "learning_rate": 4.8980558340894546e-06,
      "loss": 1.7863,
      "step": 11795
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.820939064025879,
      "learning_rate": 4.8906422753492e-06,
      "loss": 1.3581,
      "step": 11796
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0435128211975098,
      "learning_rate": 4.883234190699626e-06,
      "loss": 1.6039,
      "step": 11797
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8964574337005615,
      "learning_rate": 4.875831580567114e-06,
      "loss": 1.1996,
      "step": 11798
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6705560684204102,
      "learning_rate": 4.868434445377701e-06,
      "loss": 1.3654,
      "step": 11799
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.235552430152893,
      "learning_rate": 4.861042785557146e-06,
      "loss": 0.676,
      "step": 11800
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1591122150421143,
      "learning_rate": 4.8536566015308985e-06,
      "loss": 1.2008,
      "step": 11801
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3563876152038574,
      "learning_rate": 4.84627589372404e-06,
      "loss": 1.5117,
      "step": 11802
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.414484977722168,
      "learning_rate": 4.838900662561419e-06,
      "loss": 1.4125,
      "step": 11803
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.273219347000122,
      "learning_rate": 4.831530908467485e-06,
      "loss": 1.1779,
      "step": 11804
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0183517932891846,
      "learning_rate": 4.8241666318664115e-06,
      "loss": 0.4728,
      "step": 11805
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0531437397003174,
      "learning_rate": 4.816807833182069e-06,
      "loss": 1.462,
      "step": 11806
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1241111755371094,
      "learning_rate": 4.809454512838007e-06,
      "loss": 1.5855,
      "step": 11807
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6621942520141602,
      "learning_rate": 4.802106671257423e-06,
      "loss": 1.4657,
      "step": 11808
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2438610792160034,
      "learning_rate": 4.794764308863242e-06,
      "loss": 1.4327,
      "step": 11809
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4497740268707275,
      "learning_rate": 4.787427426078073e-06,
      "loss": 1.4957,
      "step": 11810
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9657450914382935,
      "learning_rate": 4.780096023324187e-06,
      "loss": 1.0523,
      "step": 11811
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.277106523513794,
      "learning_rate": 4.772770101023538e-06,
      "loss": 1.224,
      "step": 11812
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6610889434814453,
      "learning_rate": 4.765449659597776e-06,
      "loss": 1.4577,
      "step": 11813
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2051759958267212,
      "learning_rate": 4.758134699468264e-06,
      "loss": 1.2872,
      "step": 11814
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.454433560371399,
      "learning_rate": 4.750825221055965e-06,
      "loss": 1.766,
      "step": 11815
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6977356672286987,
      "learning_rate": 4.743521224781622e-06,
      "loss": 1.0935,
      "step": 11816
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5624066591262817,
      "learning_rate": 4.73622271106563e-06,
      "loss": 1.1608,
      "step": 11817
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9719696044921875,
      "learning_rate": 4.728929680328031e-06,
      "loss": 1.8137,
      "step": 11818
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4562290906906128,
      "learning_rate": 4.7216421329885906e-06,
      "loss": 1.4163,
      "step": 11819
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8915809392929077,
      "learning_rate": 4.714360069466761e-06,
      "loss": 1.3091,
      "step": 11820
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4841370582580566,
      "learning_rate": 4.7070834901816626e-06,
      "loss": 1.3099,
      "step": 11821
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6731656789779663,
      "learning_rate": 4.699812395552083e-06,
      "loss": 1.1906,
      "step": 11822
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.643040418624878,
      "learning_rate": 4.692546785996543e-06,
      "loss": 1.1988,
      "step": 11823
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4633631706237793,
      "learning_rate": 4.685286661933208e-06,
      "loss": 1.7482,
      "step": 11824
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.545041799545288,
      "learning_rate": 4.6780320237799324e-06,
      "loss": 1.562,
      "step": 11825
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.0712826251983643,
      "learning_rate": 4.670782871954282e-06,
      "loss": 1.2899,
      "step": 11826
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4712687730789185,
      "learning_rate": 4.6635392068734685e-06,
      "loss": 1.6307,
      "step": 11827
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2428398132324219,
      "learning_rate": 4.656301028954413e-06,
      "loss": 0.9578,
      "step": 11828
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9435070753097534,
      "learning_rate": 4.649068338613726e-06,
      "loss": 1.3161,
      "step": 11829
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.70847749710083,
      "learning_rate": 4.641841136267666e-06,
      "loss": 1.5346,
      "step": 11830
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.315681219100952,
      "learning_rate": 4.634619422332232e-06,
      "loss": 0.9713,
      "step": 11831
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.0655009746551514,
      "learning_rate": 4.627403197223057e-06,
      "loss": 1.0688,
      "step": 11832
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3054003715515137,
      "learning_rate": 4.620192461355466e-06,
      "loss": 1.2847,
      "step": 11833
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2001326084136963,
      "learning_rate": 4.612987215144482e-06,
      "loss": 1.0169,
      "step": 11834
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5549525022506714,
      "learning_rate": 4.605787459004829e-06,
      "loss": 1.0327,
      "step": 11835
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7758978605270386,
      "learning_rate": 4.5985931933508754e-06,
      "loss": 0.9061,
      "step": 11836
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.304467797279358,
      "learning_rate": 4.59140441859669e-06,
      "loss": 1.2767,
      "step": 11837
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3110052347183228,
      "learning_rate": 4.5842211351560525e-06,
      "loss": 1.7177,
      "step": 11838
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.422974705696106,
      "learning_rate": 4.577043343442366e-06,
      "loss": 1.8524,
      "step": 11839
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8066372871398926,
      "learning_rate": 4.56987104386879e-06,
      "loss": 0.83,
      "step": 11840
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.9813855290412903,
      "learning_rate": 4.562704236848126e-06,
      "loss": 1.0262,
      "step": 11841
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3917655944824219,
      "learning_rate": 4.5555429227928325e-06,
      "loss": 1.7229,
      "step": 11842
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2681527137756348,
      "learning_rate": 4.548387102115104e-06,
      "loss": 1.4744,
      "step": 11843
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.502598762512207,
      "learning_rate": 4.541236775226809e-06,
      "loss": 1.094,
      "step": 11844
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.333060622215271,
      "learning_rate": 4.534091942539475e-06,
      "loss": 1.2384,
      "step": 11845
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1284501552581787,
      "learning_rate": 4.5269526044643405e-06,
      "loss": 1.6076,
      "step": 11846
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6270610094070435,
      "learning_rate": 4.51981876141232e-06,
      "loss": 1.431,
      "step": 11847
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1142446994781494,
      "learning_rate": 4.512690413793974e-06,
      "loss": 1.4482,
      "step": 11848
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2749991416931152,
      "learning_rate": 4.50556756201963e-06,
      "loss": 1.67,
      "step": 11849
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6656441688537598,
      "learning_rate": 4.498450206499216e-06,
      "loss": 1.1161,
      "step": 11850
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9611330032348633,
      "learning_rate": 4.491338347642371e-06,
      "loss": 1.2519,
      "step": 11851
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4291296005249023,
      "learning_rate": 4.484231985858434e-06,
      "loss": 1.0985,
      "step": 11852
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5367895364761353,
      "learning_rate": 4.4771311215564325e-06,
      "loss": 1.4738,
      "step": 11853
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4200443029403687,
      "learning_rate": 4.4700357551450415e-06,
      "loss": 1.3293,
      "step": 11854
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2760356664657593,
      "learning_rate": 4.462945887032632e-06,
      "loss": 1.1259,
      "step": 11855
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.866826295852661,
      "learning_rate": 4.4558615176273025e-06,
      "loss": 2.0343,
      "step": 11856
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.677333354949951,
      "learning_rate": 4.448782647336769e-06,
      "loss": 1.9965,
      "step": 11857
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.47713041305542,
      "learning_rate": 4.441709276568484e-06,
      "loss": 1.981,
      "step": 11858
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.3232192993164062,
      "learning_rate": 4.434641405729522e-06,
      "loss": 1.5653,
      "step": 11859
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9655184745788574,
      "learning_rate": 4.427579035226725e-06,
      "loss": 2.7626,
      "step": 11860
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8377163410186768,
      "learning_rate": 4.420522165466545e-06,
      "loss": 0.8731,
      "step": 11861
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2747951745986938,
      "learning_rate": 4.413470796855157e-06,
      "loss": 1.4149,
      "step": 11862
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9715408682823181,
      "learning_rate": 4.406424929798403e-06,
      "loss": 1.1165,
      "step": 11863
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.432844638824463,
      "learning_rate": 4.3993845647018025e-06,
      "loss": 1.7686,
      "step": 11864
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1200008392333984,
      "learning_rate": 4.3923497019706e-06,
      "loss": 1.7274,
      "step": 11865
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.410407781600952,
      "learning_rate": 4.3853203420096575e-06,
      "loss": 1.7672,
      "step": 11866
    },
    {
      "epoch": 0.91,
      "grad_norm": 4.440566062927246,
      "learning_rate": 4.378296485223565e-06,
      "loss": 1.1552,
      "step": 11867
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.8936312198638916,
      "learning_rate": 4.371278132016609e-06,
      "loss": 1.6573,
      "step": 11868
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3642892837524414,
      "learning_rate": 4.364265282792724e-06,
      "loss": 1.2135,
      "step": 11869
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3277878761291504,
      "learning_rate": 4.357257937955517e-06,
      "loss": 1.3781,
      "step": 11870
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2639245986938477,
      "learning_rate": 4.3502560979083225e-06,
      "loss": 1.405,
      "step": 11871
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2058517932891846,
      "learning_rate": 4.343259763054131e-06,
      "loss": 1.9796,
      "step": 11872
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9911342859268188,
      "learning_rate": 4.336268933795628e-06,
      "loss": 1.7227,
      "step": 11873
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7804477214813232,
      "learning_rate": 4.329283610535173e-06,
      "loss": 1.7033,
      "step": 11874
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5912375450134277,
      "learning_rate": 4.322303793674798e-06,
      "loss": 1.6856,
      "step": 11875
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.7499196529388428,
      "learning_rate": 4.315329483616248e-06,
      "loss": 2.1644,
      "step": 11876
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4920681715011597,
      "learning_rate": 4.308360680760937e-06,
      "loss": 1.4765,
      "step": 11877
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3668397665023804,
      "learning_rate": 4.301397385509964e-06,
      "loss": 0.8538,
      "step": 11878
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4357692003250122,
      "learning_rate": 4.2944395982640665e-06,
      "loss": 1.0982,
      "step": 11879
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2617512941360474,
      "learning_rate": 4.287487319423756e-06,
      "loss": 1.375,
      "step": 11880
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1823307275772095,
      "learning_rate": 4.280540549389145e-06,
      "loss": 0.9002,
      "step": 11881
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8440476655960083,
      "learning_rate": 4.273599288560071e-06,
      "loss": 0.6156,
      "step": 11882
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.4337921142578125,
      "learning_rate": 4.266663537336046e-06,
      "loss": 1.5789,
      "step": 11883
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9685393571853638,
      "learning_rate": 4.2597332961162414e-06,
      "loss": 1.2427,
      "step": 11884
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1823046207427979,
      "learning_rate": 4.25280856529956e-06,
      "loss": 0.7283,
      "step": 11885
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3919429779052734,
      "learning_rate": 4.245889345284559e-06,
      "loss": 1.435,
      "step": 11886
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.293743848800659,
      "learning_rate": 4.238975636469467e-06,
      "loss": 1.067,
      "step": 11887
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1327122449874878,
      "learning_rate": 4.232067439252207e-06,
      "loss": 1.5289,
      "step": 11888
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7327430248260498,
      "learning_rate": 4.225164754030386e-06,
      "loss": 1.5568,
      "step": 11889
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.699083924293518,
      "learning_rate": 4.2182675812012965e-06,
      "loss": 1.2731,
      "step": 11890
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8676871061325073,
      "learning_rate": 4.211375921161909e-06,
      "loss": 1.0679,
      "step": 11891
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.852229118347168,
      "learning_rate": 4.204489774308884e-06,
      "loss": 1.335,
      "step": 11892
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.733964443206787,
      "learning_rate": 4.197609141038539e-06,
      "loss": 1.1892,
      "step": 11893
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4969615936279297,
      "learning_rate": 4.190734021746923e-06,
      "loss": 1.4834,
      "step": 11894
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.540762186050415,
      "learning_rate": 4.18386441682973e-06,
      "loss": 1.6946,
      "step": 11895
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6750119924545288,
      "learning_rate": 4.177000326682323e-06,
      "loss": 1.7727,
      "step": 11896
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0865751504898071,
      "learning_rate": 4.170141751699796e-06,
      "loss": 1.0034,
      "step": 11897
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3112242221832275,
      "learning_rate": 4.163288692276901e-06,
      "loss": 1.0855,
      "step": 11898
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.740527868270874,
      "learning_rate": 4.1564411488080435e-06,
      "loss": 1.9351,
      "step": 11899
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.449326992034912,
      "learning_rate": 4.149599121687364e-06,
      "loss": 1.6584,
      "step": 11900
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2914906740188599,
      "learning_rate": 4.142762611308648e-06,
      "loss": 0.8927,
      "step": 11901
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9253910779953003,
      "learning_rate": 4.1359316180653806e-06,
      "loss": 1.5314,
      "step": 11902
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1099952459335327,
      "learning_rate": 4.129106142350725e-06,
      "loss": 1.4295,
      "step": 11903
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2602760791778564,
      "learning_rate": 4.122286184557533e-06,
      "loss": 1.236,
      "step": 11904
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4527899026870728,
      "learning_rate": 4.115471745078314e-06,
      "loss": 1.3936,
      "step": 11905
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.150482416152954,
      "learning_rate": 4.108662824305299e-06,
      "loss": 1.7992,
      "step": 11906
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2936065196990967,
      "learning_rate": 4.101859422630372e-06,
      "loss": 1.0229,
      "step": 11907
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.1065409183502197,
      "learning_rate": 4.0950615404450995e-06,
      "loss": 1.2598,
      "step": 11908
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4630913734436035,
      "learning_rate": 4.0882691781407465e-06,
      "loss": 1.254,
      "step": 11909
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.43723464012146,
      "learning_rate": 4.081482336108266e-06,
      "loss": 1.1776,
      "step": 11910
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5077731609344482,
      "learning_rate": 4.074701014738247e-06,
      "loss": 1.9613,
      "step": 11911
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.311219573020935,
      "learning_rate": 4.06792521442102e-06,
      "loss": 1.0301,
      "step": 11912
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3248891830444336,
      "learning_rate": 4.061154935546563e-06,
      "loss": 1.0389,
      "step": 11913
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4430930614471436,
      "learning_rate": 4.054390178504552e-06,
      "loss": 1.241,
      "step": 11914
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.764051675796509,
      "learning_rate": 4.047630943684333e-06,
      "loss": 1.8402,
      "step": 11915
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0569671392440796,
      "learning_rate": 4.040877231474938e-06,
      "loss": 0.9124,
      "step": 11916
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4743187427520752,
      "learning_rate": 4.034129042265066e-06,
      "loss": 1.4774,
      "step": 11917
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.500390648841858,
      "learning_rate": 4.0273863764431315e-06,
      "loss": 1.3292,
      "step": 11918
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4508795738220215,
      "learning_rate": 4.020649234397223e-06,
      "loss": 1.3777,
      "step": 11919
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4601376056671143,
      "learning_rate": 4.0139176165150835e-06,
      "loss": 0.8075,
      "step": 11920
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.965762734413147,
      "learning_rate": 4.00719152318415e-06,
      "loss": 1.8653,
      "step": 11921
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.633507013320923,
      "learning_rate": 4.000470954791591e-06,
      "loss": 2.391,
      "step": 11922
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.38156795501709,
      "learning_rate": 3.993755911724162e-06,
      "loss": 1.0955,
      "step": 11923
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5119420289993286,
      "learning_rate": 3.987046394368388e-06,
      "loss": 0.9931,
      "step": 11924
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3899707794189453,
      "learning_rate": 3.980342403110404e-06,
      "loss": 1.1133,
      "step": 11925
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.411418080329895,
      "learning_rate": 3.973643938336113e-06,
      "loss": 1.0253,
      "step": 11926
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4229869842529297,
      "learning_rate": 3.966951000430996e-06,
      "loss": 2.0095,
      "step": 11927
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.35450279712677,
      "learning_rate": 3.960263589780322e-06,
      "loss": 1.6869,
      "step": 11928
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.477843999862671,
      "learning_rate": 3.953581706768949e-06,
      "loss": 1.0329,
      "step": 11929
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.426182508468628,
      "learning_rate": 3.946905351781472e-06,
      "loss": 1.1871,
      "step": 11930
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1461577415466309,
      "learning_rate": 3.940234525202169e-06,
      "loss": 1.6468,
      "step": 11931
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4093172550201416,
      "learning_rate": 3.933569227414957e-06,
      "loss": 1.1145,
      "step": 11932
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8775044679641724,
      "learning_rate": 3.926909458803474e-06,
      "loss": 1.3327,
      "step": 11933
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4949190616607666,
      "learning_rate": 3.920255219751045e-06,
      "loss": 1.1458,
      "step": 11934
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.429601788520813,
      "learning_rate": 3.913606510640644e-06,
      "loss": 1.5889,
      "step": 11935
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.352559804916382,
      "learning_rate": 3.906963331854929e-06,
      "loss": 1.4831,
      "step": 11936
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4837313890457153,
      "learning_rate": 3.900325683776284e-06,
      "loss": 1.7965,
      "step": 11937
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.092724561691284,
      "learning_rate": 3.893693566786716e-06,
      "loss": 1.2335,
      "step": 11938
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.260905146598816,
      "learning_rate": 3.88706698126795e-06,
      "loss": 1.5144,
      "step": 11939
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.007812738418579,
      "learning_rate": 3.880445927601395e-06,
      "loss": 1.1658,
      "step": 11940
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.17819881439209,
      "learning_rate": 3.873830406168111e-06,
      "loss": 1.0962,
      "step": 11941
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.929715633392334,
      "learning_rate": 3.8672204173488826e-06,
      "loss": 0.9926,
      "step": 11942
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7748539447784424,
      "learning_rate": 3.860615961524139e-06,
      "loss": 1.5041,
      "step": 11943
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3648627996444702,
      "learning_rate": 3.854017039074009e-06,
      "loss": 1.3588,
      "step": 11944
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.296473503112793,
      "learning_rate": 3.84742365037829e-06,
      "loss": 1.6058,
      "step": 11945
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1444344520568848,
      "learning_rate": 3.840835795816466e-06,
      "loss": 1.2119,
      "step": 11946
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0871998071670532,
      "learning_rate": 3.834253475767735e-06,
      "loss": 1.3651,
      "step": 11947
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.697192907333374,
      "learning_rate": 3.8276766906109155e-06,
      "loss": 1.158,
      "step": 11948
    },
    {
      "epoch": 0.91,
      "grad_norm": 4.892294883728027,
      "learning_rate": 3.8211054407245486e-06,
      "loss": 1.7701,
      "step": 11949
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1040594577789307,
      "learning_rate": 3.8145397264868656e-06,
      "loss": 1.3311,
      "step": 11950
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2179603576660156,
      "learning_rate": 3.807979548275742e-06,
      "loss": 1.3781,
      "step": 11951
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4547556638717651,
      "learning_rate": 3.8014249064687536e-06,
      "loss": 1.2551,
      "step": 11952
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5079445838928223,
      "learning_rate": 3.794875801443176e-06,
      "loss": 1.4236,
      "step": 11953
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9704419374465942,
      "learning_rate": 3.7883322335759084e-06,
      "loss": 1.882,
      "step": 11954
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.286856174468994,
      "learning_rate": 3.7817942032436048e-06,
      "loss": 1.1019,
      "step": 11955
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0994435548782349,
      "learning_rate": 3.775261710822564e-06,
      "loss": 0.6096,
      "step": 11956
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1805756092071533,
      "learning_rate": 3.768734756688763e-06,
      "loss": 1.4229,
      "step": 11957
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.242511034011841,
      "learning_rate": 3.7622133412178575e-06,
      "loss": 1.4436,
      "step": 11958
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0547428131103516,
      "learning_rate": 3.7556974647852017e-06,
      "loss": 1.2828,
      "step": 11959
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.494156002998352,
      "learning_rate": 3.7491871277658186e-06,
      "loss": 1.4108,
      "step": 11960
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.488516926765442,
      "learning_rate": 3.7426823305344196e-06,
      "loss": 1.6267,
      "step": 11961
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8085473775863647,
      "learning_rate": 3.7361830734653936e-06,
      "loss": 1.1249,
      "step": 11962
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.52424693107605,
      "learning_rate": 3.7296893569328196e-06,
      "loss": 1.3409,
      "step": 11963
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6333152055740356,
      "learning_rate": 3.7232011813104093e-06,
      "loss": 1.3816,
      "step": 11964
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3049743175506592,
      "learning_rate": 3.7167185469716426e-06,
      "loss": 1.2161,
      "step": 11965
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6980193853378296,
      "learning_rate": 3.710241454289598e-06,
      "loss": 1.6007,
      "step": 11966
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1585636138916016,
      "learning_rate": 3.703769903637089e-06,
      "loss": 0.8745,
      "step": 11967
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7587305307388306,
      "learning_rate": 3.6973038953865837e-06,
      "loss": 1.1101,
      "step": 11968
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.9990256428718567,
      "learning_rate": 3.690843429910229e-06,
      "loss": 1.6802,
      "step": 11969
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6511043310165405,
      "learning_rate": 3.684388507579872e-06,
      "loss": 0.961,
      "step": 11970
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0895123481750488,
      "learning_rate": 3.6779391287670494e-06,
      "loss": 0.9619,
      "step": 11971
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0488873720169067,
      "learning_rate": 3.6714952938429304e-06,
      "loss": 1.3477,
      "step": 11972
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.587536573410034,
      "learning_rate": 3.665057003178396e-06,
      "loss": 1.5787,
      "step": 11973
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1434811353683472,
      "learning_rate": 3.6586242571440386e-06,
      "loss": 0.96,
      "step": 11974
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.531602144241333,
      "learning_rate": 3.6521970561100515e-06,
      "loss": 1.0641,
      "step": 11975
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.1168748140335083,
      "learning_rate": 3.645775400446383e-06,
      "loss": 1.354,
      "step": 11976
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.119413137435913,
      "learning_rate": 3.6393592905226483e-06,
      "loss": 1.4724,
      "step": 11977
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3926554918289185,
      "learning_rate": 3.6329487267081186e-06,
      "loss": 0.8434,
      "step": 11978
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4595978260040283,
      "learning_rate": 3.6265437093717437e-06,
      "loss": 1.6945,
      "step": 11979
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4392191171646118,
      "learning_rate": 3.620144238882206e-06,
      "loss": 0.8421,
      "step": 11980
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6651784181594849,
      "learning_rate": 3.6137503156078113e-06,
      "loss": 1.6506,
      "step": 11981
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4598582983016968,
      "learning_rate": 3.6073619399165427e-06,
      "loss": 1.172,
      "step": 11982
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.388120174407959,
      "learning_rate": 3.600979112176128e-06,
      "loss": 1.5572,
      "step": 11983
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.2534856796264648,
      "learning_rate": 3.5946018327539188e-06,
      "loss": 0.7889,
      "step": 11984
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.511744260787964,
      "learning_rate": 3.588230102016954e-06,
      "loss": 1.3589,
      "step": 11985
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.8399077653884888,
      "learning_rate": 3.5818639203319848e-06,
      "loss": 0.7548,
      "step": 11986
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4987759590148926,
      "learning_rate": 3.575503288065396e-06,
      "loss": 1.9743,
      "step": 11987
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.7911930680274963,
      "learning_rate": 3.569148205583295e-06,
      "loss": 0.5651,
      "step": 11988
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.0381176471710205,
      "learning_rate": 3.562798673251466e-06,
      "loss": 0.9151,
      "step": 11989
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8620930910110474,
      "learning_rate": 3.556454691435329e-06,
      "loss": 1.0683,
      "step": 11990
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5560107231140137,
      "learning_rate": 3.550116260500047e-06,
      "loss": 1.9147,
      "step": 11991
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5224028825759888,
      "learning_rate": 3.543783380810428e-06,
      "loss": 1.1448,
      "step": 11992
    },
    {
      "epoch": 0.92,
      "grad_norm": 6.480856418609619,
      "learning_rate": 3.5374560527309363e-06,
      "loss": 1.1195,
      "step": 11993
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.094738721847534,
      "learning_rate": 3.5311342766257695e-06,
      "loss": 1.54,
      "step": 11994
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3797285556793213,
      "learning_rate": 3.5248180528588024e-06,
      "loss": 1.7868,
      "step": 11995
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.5274770259857178,
      "learning_rate": 3.518507381793523e-06,
      "loss": 1.5388,
      "step": 11996
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5408436059951782,
      "learning_rate": 3.512202263793174e-06,
      "loss": 1.4277,
      "step": 11997
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.307950019836426,
      "learning_rate": 3.5059026992206647e-06,
      "loss": 1.6127,
      "step": 11998
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.009178400039673,
      "learning_rate": 3.4996086884385383e-06,
      "loss": 1.5062,
      "step": 11999
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.639479637145996,
      "learning_rate": 3.4933202318090828e-06,
      "loss": 1.3417,
      "step": 12000
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8464726209640503,
      "learning_rate": 3.4870373296942316e-06,
      "loss": 1.315,
      "step": 12001
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.829265594482422,
      "learning_rate": 3.4807599824555615e-06,
      "loss": 1.5471,
      "step": 12002
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7422820329666138,
      "learning_rate": 3.4744881904544057e-06,
      "loss": 2.1113,
      "step": 12003
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1964062452316284,
      "learning_rate": 3.4682219540517536e-06,
      "loss": 1.1264,
      "step": 12004
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2096002101898193,
      "learning_rate": 3.4619612736082273e-06,
      "loss": 1.3241,
      "step": 12005
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4821805953979492,
      "learning_rate": 3.4557061494841835e-06,
      "loss": 1.5657,
      "step": 12006
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3864182233810425,
      "learning_rate": 3.449456582039645e-06,
      "loss": 1.0992,
      "step": 12007
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8159229755401611,
      "learning_rate": 3.443212571634302e-06,
      "loss": 1.4856,
      "step": 12008
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.5395731925964355,
      "learning_rate": 3.436974118627545e-06,
      "loss": 0.604,
      "step": 12009
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4026204347610474,
      "learning_rate": 3.4307412233784308e-06,
      "loss": 1.0829,
      "step": 12010
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8309879899024963,
      "learning_rate": 3.4245138862456728e-06,
      "loss": 0.8027,
      "step": 12011
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.246238350868225,
      "learning_rate": 3.418292107587706e-06,
      "loss": 1.0701,
      "step": 12012
    },
    {
      "epoch": 0.92,
      "grad_norm": 4.200657844543457,
      "learning_rate": 3.4120758877626555e-06,
      "loss": 1.7114,
      "step": 12013
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7835906744003296,
      "learning_rate": 3.4058652271282578e-06,
      "loss": 1.3696,
      "step": 12014
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.358012080192566,
      "learning_rate": 3.3996601260419923e-06,
      "loss": 1.3125,
      "step": 12015
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.014047622680664,
      "learning_rate": 3.393460584861008e-06,
      "loss": 0.9619,
      "step": 12016
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1814861297607422,
      "learning_rate": 3.3872666039421076e-06,
      "loss": 1.0878,
      "step": 12017
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3492258787155151,
      "learning_rate": 3.381078183641806e-06,
      "loss": 1.6929,
      "step": 12018
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0226621627807617,
      "learning_rate": 3.3748953243162627e-06,
      "loss": 1.2438,
      "step": 12019
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.162513017654419,
      "learning_rate": 3.36871802632136e-06,
      "loss": 1.28,
      "step": 12020
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3701221942901611,
      "learning_rate": 3.3625462900126136e-06,
      "loss": 0.8977,
      "step": 12021
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.023123860359192,
      "learning_rate": 3.3563801157452612e-06,
      "loss": 0.9981,
      "step": 12022
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4911173582077026,
      "learning_rate": 3.3502195038741857e-06,
      "loss": 1.9638,
      "step": 12023
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4599124193191528,
      "learning_rate": 3.34406445475397e-06,
      "loss": 1.3418,
      "step": 12024
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7228615283966064,
      "learning_rate": 3.3379149687388867e-06,
      "loss": 1.1623,
      "step": 12025
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2171915769577026,
      "learning_rate": 3.331771046182852e-06,
      "loss": 0.9385,
      "step": 12026
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5356168746948242,
      "learning_rate": 3.325632687439495e-06,
      "loss": 1.0595,
      "step": 12027
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.2158002853393555,
      "learning_rate": 3.3194998928621323e-06,
      "loss": 1.3342,
      "step": 12028
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4540281295776367,
      "learning_rate": 3.3133726628037153e-06,
      "loss": 0.7339,
      "step": 12029
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3948249816894531,
      "learning_rate": 3.3072509976169065e-06,
      "loss": 1.286,
      "step": 12030
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7642834186553955,
      "learning_rate": 3.3011348976540457e-06,
      "loss": 1.1199,
      "step": 12031
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.264345407485962,
      "learning_rate": 3.295024363267152e-06,
      "loss": 1.1392,
      "step": 12032
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4451687335968018,
      "learning_rate": 3.28891939480791e-06,
      "loss": 1.0275,
      "step": 12033
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.7374727725982666,
      "learning_rate": 3.282819992627717e-06,
      "loss": 2.3026,
      "step": 12034
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7753076553344727,
      "learning_rate": 3.2767261570776143e-06,
      "loss": 1.7966,
      "step": 12035
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4455773830413818,
      "learning_rate": 3.2706378885083323e-06,
      "loss": 1.4089,
      "step": 12036
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1399189233779907,
      "learning_rate": 3.2645551872703016e-06,
      "loss": 1.0801,
      "step": 12037
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2810397148132324,
      "learning_rate": 3.2584780537136207e-06,
      "loss": 1.5651,
      "step": 12038
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.138277530670166,
      "learning_rate": 3.2524064881880314e-06,
      "loss": 0.5858,
      "step": 12039
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0803959369659424,
      "learning_rate": 3.2463404910430206e-06,
      "loss": 1.7843,
      "step": 12040
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1430319547653198,
      "learning_rate": 3.2402800626276874e-06,
      "loss": 0.9268,
      "step": 12041
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.288724899291992,
      "learning_rate": 3.234225203290875e-06,
      "loss": 1.2031,
      "step": 12042
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2440353631973267,
      "learning_rate": 3.228175913381071e-06,
      "loss": 1.6157,
      "step": 12043
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4311864376068115,
      "learning_rate": 3.222132193246441e-06,
      "loss": 1.3828,
      "step": 12044
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.200666904449463,
      "learning_rate": 3.21609404323483e-06,
      "loss": 0.9346,
      "step": 12045
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5117766857147217,
      "learning_rate": 3.210061463693781e-06,
      "loss": 1.3531,
      "step": 12046
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5002537965774536,
      "learning_rate": 3.2040344549704948e-06,
      "loss": 1.6684,
      "step": 12047
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0409388542175293,
      "learning_rate": 3.1980130174118607e-06,
      "loss": 1.7831,
      "step": 12048
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7851649522781372,
      "learning_rate": 3.1919971513644563e-06,
      "loss": 1.3039,
      "step": 12049
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6934590339660645,
      "learning_rate": 3.1859868571745055e-06,
      "loss": 1.1617,
      "step": 12050
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.115277647972107,
      "learning_rate": 3.1799821351879644e-06,
      "loss": 1.1343,
      "step": 12051
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.268747091293335,
      "learning_rate": 3.1739829857504234e-06,
      "loss": 0.8263,
      "step": 12052
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.044794797897339,
      "learning_rate": 3.167989409207173e-06,
      "loss": 1.3005,
      "step": 12053
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3307628631591797,
      "learning_rate": 3.16200140590317e-06,
      "loss": 1.5583,
      "step": 12054
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.399969458580017,
      "learning_rate": 3.1560189761830728e-06,
      "loss": 1.5337,
      "step": 12055
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4130237102508545,
      "learning_rate": 3.1500421203911833e-06,
      "loss": 1.0571,
      "step": 12056
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9419851303100586,
      "learning_rate": 3.144070838871538e-06,
      "loss": 0.7335,
      "step": 12057
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9802253246307373,
      "learning_rate": 3.1381051319677946e-06,
      "loss": 1.0181,
      "step": 12058
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1184536218643188,
      "learning_rate": 3.132145000023301e-06,
      "loss": 0.7917,
      "step": 12059
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7921186685562134,
      "learning_rate": 3.126190443381116e-06,
      "loss": 1.0667,
      "step": 12060
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.8457953929901123,
      "learning_rate": 3.120241462383966e-06,
      "loss": 0.9512,
      "step": 12061
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1361626386642456,
      "learning_rate": 3.114298057374232e-06,
      "loss": 1.4266,
      "step": 12062
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.364863157272339,
      "learning_rate": 3.108360228693996e-06,
      "loss": 1.4059,
      "step": 12063
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3822933435440063,
      "learning_rate": 3.1024279766850184e-06,
      "loss": 1.4458,
      "step": 12064
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.637818694114685,
      "learning_rate": 3.0965013016887368e-06,
      "loss": 1.5411,
      "step": 12065
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7397501468658447,
      "learning_rate": 3.0905802040462563e-06,
      "loss": 1.1424,
      "step": 12066
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.1947286128997803,
      "learning_rate": 3.0846646840983817e-06,
      "loss": 1.2799,
      "step": 12067
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8757970333099365,
      "learning_rate": 3.0787547421855633e-06,
      "loss": 1.0046,
      "step": 12068
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1572325229644775,
      "learning_rate": 3.0728503786479735e-06,
      "loss": 1.0693,
      "step": 12069
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8321919441223145,
      "learning_rate": 3.06695159382544e-06,
      "loss": 1.7936,
      "step": 12070
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0630598068237305,
      "learning_rate": 3.0610583880574695e-06,
      "loss": 1.3267,
      "step": 12071
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3525620698928833,
      "learning_rate": 3.0551707616832347e-06,
      "loss": 1.191,
      "step": 12072
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3630342483520508,
      "learning_rate": 3.0492887150416317e-06,
      "loss": 1.2896,
      "step": 12073
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4314762353897095,
      "learning_rate": 3.043412248471178e-06,
      "loss": 1.4018,
      "step": 12074
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9290764331817627,
      "learning_rate": 3.0375413623101145e-06,
      "loss": 1.597,
      "step": 12075
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3446717262268066,
      "learning_rate": 3.0316760568963486e-06,
      "loss": 1.5516,
      "step": 12076
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1188081502914429,
      "learning_rate": 3.0258163325674437e-06,
      "loss": 1.511,
      "step": 12077
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.132788062095642,
      "learning_rate": 3.0199621896606745e-06,
      "loss": 1.2431,
      "step": 12078
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.7914326190948486,
      "learning_rate": 3.014113628512982e-06,
      "loss": 1.7565,
      "step": 12079
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.445600748062134,
      "learning_rate": 3.008270649460965e-06,
      "loss": 1.9383,
      "step": 12080
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0663244724273682,
      "learning_rate": 3.0024332528409417e-06,
      "loss": 0.687,
      "step": 12081
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0027453899383545,
      "learning_rate": 2.9966014389888777e-06,
      "loss": 1.68,
      "step": 12082
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0098276138305664,
      "learning_rate": 2.990775208240437e-06,
      "loss": 0.9203,
      "step": 12083
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.131359100341797,
      "learning_rate": 2.9849545609309524e-06,
      "loss": 1.7609,
      "step": 12084
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5380417108535767,
      "learning_rate": 2.9791394973954225e-06,
      "loss": 1.2036,
      "step": 12085
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.232794165611267,
      "learning_rate": 2.9733300179685563e-06,
      "loss": 1.083,
      "step": 12086
    },
    {
      "epoch": 0.92,
      "grad_norm": 5.045980930328369,
      "learning_rate": 2.9675261229846873e-06,
      "loss": 2.5285,
      "step": 12087
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9293673038482666,
      "learning_rate": 2.9617278127779037e-06,
      "loss": 1.3319,
      "step": 12088
    },
    {
      "epoch": 0.92,
      "grad_norm": 4.461059093475342,
      "learning_rate": 2.955935087681916e-06,
      "loss": 1.7776,
      "step": 12089
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.906585931777954,
      "learning_rate": 2.950147948030124e-06,
      "loss": 0.6776,
      "step": 12090
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2681468725204468,
      "learning_rate": 2.9443663941556067e-06,
      "loss": 1.2011,
      "step": 12091
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3752706050872803,
      "learning_rate": 2.9385904263911525e-06,
      "loss": 1.2974,
      "step": 12092
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2689939737319946,
      "learning_rate": 2.9328200450691622e-06,
      "loss": 1.5242,
      "step": 12093
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3519964218139648,
      "learning_rate": 2.927055250521793e-06,
      "loss": 0.9279,
      "step": 12094
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6887518167495728,
      "learning_rate": 2.9212960430808345e-06,
      "loss": 1.9723,
      "step": 12095
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1589820384979248,
      "learning_rate": 2.9155424230777218e-06,
      "loss": 1.9634,
      "step": 12096
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.0487043857574463,
      "learning_rate": 2.9097943908436565e-06,
      "loss": 1.3186,
      "step": 12097
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0635924339294434,
      "learning_rate": 2.9040519467094517e-06,
      "loss": 1.4542,
      "step": 12098
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8998371362686157,
      "learning_rate": 2.89831509100561e-06,
      "loss": 1.1976,
      "step": 12099
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6007601022720337,
      "learning_rate": 2.892583824062334e-06,
      "loss": 0.9074,
      "step": 12100
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2451410293579102,
      "learning_rate": 2.8868581462095033e-06,
      "loss": 1.4554,
      "step": 12101
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7980819940567017,
      "learning_rate": 2.881138057776633e-06,
      "loss": 1.6287,
      "step": 12102
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3614675998687744,
      "learning_rate": 2.8754235590929712e-06,
      "loss": 1.5656,
      "step": 12103
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3653050661087036,
      "learning_rate": 2.8697146504874097e-06,
      "loss": 1.0439,
      "step": 12104
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2433817386627197,
      "learning_rate": 2.8640113322885185e-06,
      "loss": 1.2638,
      "step": 12105
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4604198932647705,
      "learning_rate": 2.8583136048245697e-06,
      "loss": 1.6811,
      "step": 12106
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6576993465423584,
      "learning_rate": 2.852621468423511e-06,
      "loss": 1.1526,
      "step": 12107
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8149131536483765,
      "learning_rate": 2.846934923412936e-06,
      "loss": 1.6046,
      "step": 12108
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9820616245269775,
      "learning_rate": 2.8412539701201392e-06,
      "loss": 0.9075,
      "step": 12109
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3576865196228027,
      "learning_rate": 2.8355786088721135e-06,
      "loss": 1.5669,
      "step": 12110
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.9079616665840149,
      "learning_rate": 2.8299088399954875e-06,
      "loss": 0.8624,
      "step": 12111
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.298245429992676,
      "learning_rate": 2.824244663816611e-06,
      "loss": 1.6344,
      "step": 12112
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2135244607925415,
      "learning_rate": 2.818586080661456e-06,
      "loss": 1.8868,
      "step": 12113
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2843493223190308,
      "learning_rate": 2.8129330908557406e-06,
      "loss": 1.8124,
      "step": 12114
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6858468055725098,
      "learning_rate": 2.8072856947248037e-06,
      "loss": 1.0068,
      "step": 12115
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0937423706054688,
      "learning_rate": 2.8016438925937082e-06,
      "loss": 1.8657,
      "step": 12116
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.1624290943145752,
      "learning_rate": 2.7960076847871498e-06,
      "loss": 0.9446,
      "step": 12117
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7812198400497437,
      "learning_rate": 2.790377071629524e-06,
      "loss": 1.3876,
      "step": 12118
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5162807703018188,
      "learning_rate": 2.784752053444928e-06,
      "loss": 1.925,
      "step": 12119
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2596086263656616,
      "learning_rate": 2.779132630557091e-06,
      "loss": 1.249,
      "step": 12120
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7183926105499268,
      "learning_rate": 2.7735188032894434e-06,
      "loss": 1.8289,
      "step": 12121
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.346285581588745,
      "learning_rate": 2.7679105719651264e-06,
      "loss": 0.9455,
      "step": 12122
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2659974098205566,
      "learning_rate": 2.762307936906894e-06,
      "loss": 1.1416,
      "step": 12123
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3313379287719727,
      "learning_rate": 2.7567108984372093e-06,
      "loss": 1.357,
      "step": 12124
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7182910442352295,
      "learning_rate": 2.7511194568782263e-06,
      "loss": 0.9042,
      "step": 12125
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.525767207145691,
      "learning_rate": 2.745533612551765e-06,
      "loss": 1.2871,
      "step": 12126
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.593380331993103,
      "learning_rate": 2.739953365779302e-06,
      "loss": 1.2071,
      "step": 12127
    },
    {
      "epoch": 0.93,
      "grad_norm": 4.317625522613525,
      "learning_rate": 2.7343787168820466e-06,
      "loss": 1.2139,
      "step": 12128
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.669893980026245,
      "learning_rate": 2.7288096661808315e-06,
      "loss": 1.6236,
      "step": 12129
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.330817461013794,
      "learning_rate": 2.723246213996178e-06,
      "loss": 1.0517,
      "step": 12130
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0221527814865112,
      "learning_rate": 2.7176883606483296e-06,
      "loss": 1.0074,
      "step": 12131
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.608872890472412,
      "learning_rate": 2.7121361064571417e-06,
      "loss": 2.1238,
      "step": 12132
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1375700235366821,
      "learning_rate": 2.706589451742181e-06,
      "loss": 0.7909,
      "step": 12133
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.312111258506775,
      "learning_rate": 2.701048396822714e-06,
      "loss": 1.4665,
      "step": 12134
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5451921224594116,
      "learning_rate": 2.6955129420176196e-06,
      "loss": 2.2179,
      "step": 12135
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1867555379867554,
      "learning_rate": 2.6899830876455202e-06,
      "loss": 1.2051,
      "step": 12136
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.711233139038086,
      "learning_rate": 2.6844588340247057e-06,
      "loss": 1.1427,
      "step": 12137
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6571502685546875,
      "learning_rate": 2.6789401814730885e-06,
      "loss": 2.0174,
      "step": 12138
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7038646936416626,
      "learning_rate": 2.673427130308326e-06,
      "loss": 1.6321,
      "step": 12139
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4179856777191162,
      "learning_rate": 2.667919680847741e-06,
      "loss": 1.5804,
      "step": 12140
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4185998439788818,
      "learning_rate": 2.6624178334082927e-06,
      "loss": 1.3193,
      "step": 12141
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2473220825195312,
      "learning_rate": 2.6569215883066377e-06,
      "loss": 0.6569,
      "step": 12142
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0298306941986084,
      "learning_rate": 2.6514309458591346e-06,
      "loss": 1.8168,
      "step": 12143
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8477952480316162,
      "learning_rate": 2.645945906381797e-06,
      "loss": 1.5512,
      "step": 12144
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.180233359336853,
      "learning_rate": 2.640466470190317e-06,
      "loss": 1.0499,
      "step": 12145
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.429351568222046,
      "learning_rate": 2.6349926376000755e-06,
      "loss": 1.3922,
      "step": 12146
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.7648658752441406,
      "learning_rate": 2.6295244089261204e-06,
      "loss": 1.391,
      "step": 12147
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1827863454818726,
      "learning_rate": 2.6240617844831674e-06,
      "loss": 1.0888,
      "step": 12148
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7109932899475098,
      "learning_rate": 2.6186047645856417e-06,
      "loss": 1.1062,
      "step": 12149
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7538444995880127,
      "learning_rate": 2.6131533495476033e-06,
      "loss": 2.5153,
      "step": 12150
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5106112957000732,
      "learning_rate": 2.6077075396828466e-06,
      "loss": 1.3741,
      "step": 12151
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5205888748168945,
      "learning_rate": 2.6022673353047754e-06,
      "loss": 1.5364,
      "step": 12152
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2791645526885986,
      "learning_rate": 2.5968327367265175e-06,
      "loss": 0.8945,
      "step": 12153
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4316375255584717,
      "learning_rate": 2.5914037442608674e-06,
      "loss": 2.001,
      "step": 12154
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2198189496994019,
      "learning_rate": 2.5859803582202968e-06,
      "loss": 0.6544,
      "step": 12155
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4601643085479736,
      "learning_rate": 2.5805625789169342e-06,
      "loss": 1.2762,
      "step": 12156
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0920969247817993,
      "learning_rate": 2.57515040666263e-06,
      "loss": 0.7739,
      "step": 12157
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.516317367553711,
      "learning_rate": 2.569743841768879e-06,
      "loss": 1.3553,
      "step": 12158
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0323896408081055,
      "learning_rate": 2.5643428845468443e-06,
      "loss": 1.5989,
      "step": 12159
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5749074220657349,
      "learning_rate": 2.5589475353073988e-06,
      "loss": 1.123,
      "step": 12160
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0789483785629272,
      "learning_rate": 2.5535577943610724e-06,
      "loss": 1.1473,
      "step": 12161
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2974616289138794,
      "learning_rate": 2.548173662018072e-06,
      "loss": 1.5991,
      "step": 12162
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.204244613647461,
      "learning_rate": 2.542795138588283e-06,
      "loss": 1.5563,
      "step": 12163
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3449748754501343,
      "learning_rate": 2.5374222243812806e-06,
      "loss": 1.3681,
      "step": 12164
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9425383806228638,
      "learning_rate": 2.532054919706295e-06,
      "loss": 0.8551,
      "step": 12165
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2087656259536743,
      "learning_rate": 2.526693224872256e-06,
      "loss": 1.1121,
      "step": 12166
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.0815958976745605,
      "learning_rate": 2.5213371401877516e-06,
      "loss": 1.7507,
      "step": 12167
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.3646957874298096,
      "learning_rate": 2.5159866659610566e-06,
      "loss": 1.4238,
      "step": 12168
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6675214767456055,
      "learning_rate": 2.5106418025001356e-06,
      "loss": 1.7629,
      "step": 12169
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.9914548993110657,
      "learning_rate": 2.5053025501126093e-06,
      "loss": 1.2724,
      "step": 12170
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3724204301834106,
      "learning_rate": 2.499968909105754e-06,
      "loss": 1.4684,
      "step": 12171
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5004329681396484,
      "learning_rate": 2.4946408797865916e-06,
      "loss": 1.138,
      "step": 12172
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6221798658370972,
      "learning_rate": 2.489318462461765e-06,
      "loss": 1.4923,
      "step": 12173
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0273737907409668,
      "learning_rate": 2.4840016574375957e-06,
      "loss": 1.3171,
      "step": 12174
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6639175415039062,
      "learning_rate": 2.478690465020117e-06,
      "loss": 1.9395,
      "step": 12175
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1604485511779785,
      "learning_rate": 2.4733848855150177e-06,
      "loss": 1.2712,
      "step": 12176
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2687017917633057,
      "learning_rate": 2.4680849192276424e-06,
      "loss": 1.27,
      "step": 12177
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1367870569229126,
      "learning_rate": 2.462790566463069e-06,
      "loss": 1.3366,
      "step": 12178
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3126882314682007,
      "learning_rate": 2.4575018275259877e-06,
      "loss": 1.6603,
      "step": 12179
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.2858405113220215,
      "learning_rate": 2.452218702720821e-06,
      "loss": 1.3671,
      "step": 12180
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2356717586517334,
      "learning_rate": 2.446941192351615e-06,
      "loss": 0.7378,
      "step": 12181
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1453909873962402,
      "learning_rate": 2.441669296722138e-06,
      "loss": 1.6854,
      "step": 12182
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6270573139190674,
      "learning_rate": 2.4364030161358243e-06,
      "loss": 1.8347,
      "step": 12183
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.030155897140503,
      "learning_rate": 2.4311423508957543e-06,
      "loss": 1.0276,
      "step": 12184
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5067020654678345,
      "learning_rate": 2.425887301304741e-06,
      "loss": 0.7855,
      "step": 12185
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6410597562789917,
      "learning_rate": 2.4206378676652097e-06,
      "loss": 1.7268,
      "step": 12186
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.239916205406189,
      "learning_rate": 2.415394050279318e-06,
      "loss": 1.4176,
      "step": 12187
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5897455215454102,
      "learning_rate": 2.410155849448892e-06,
      "loss": 1.1698,
      "step": 12188
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2090113162994385,
      "learning_rate": 2.4049232654753897e-06,
      "loss": 1.6229,
      "step": 12189
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2646639347076416,
      "learning_rate": 2.3996962986599814e-06,
      "loss": 2.0989,
      "step": 12190
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2712002992630005,
      "learning_rate": 2.394474949303516e-06,
      "loss": 1.7018,
      "step": 12191
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9516265392303467,
      "learning_rate": 2.3892592177065075e-06,
      "loss": 1.5197,
      "step": 12192
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.546196222305298,
      "learning_rate": 2.3840491041691616e-06,
      "loss": 1.469,
      "step": 12193
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5721932649612427,
      "learning_rate": 2.3788446089913483e-06,
      "loss": 1.4629,
      "step": 12194
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5790841579437256,
      "learning_rate": 2.373645732472596e-06,
      "loss": 1.6258,
      "step": 12195
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0810922384262085,
      "learning_rate": 2.368452474912153e-06,
      "loss": 1.1257,
      "step": 12196
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4314982891082764,
      "learning_rate": 2.363264836608914e-06,
      "loss": 1.9565,
      "step": 12197
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3389462232589722,
      "learning_rate": 2.3580828178614733e-06,
      "loss": 1.2915,
      "step": 12198
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1231729984283447,
      "learning_rate": 2.3529064189680483e-06,
      "loss": 0.9235,
      "step": 12199
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1764323711395264,
      "learning_rate": 2.3477356402266005e-06,
      "loss": 0.9341,
      "step": 12200
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.410928726196289,
      "learning_rate": 2.3425704819347248e-06,
      "loss": 1.1118,
      "step": 12201
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3947598934173584,
      "learning_rate": 2.3374109443897065e-06,
      "loss": 1.5818,
      "step": 12202
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6297544240951538,
      "learning_rate": 2.3322570278885293e-06,
      "loss": 0.9158,
      "step": 12203
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3875890970230103,
      "learning_rate": 2.32710873272779e-06,
      "loss": 1.7996,
      "step": 12204
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3790854215621948,
      "learning_rate": 2.3219660592038285e-06,
      "loss": 1.7974,
      "step": 12205
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7712006568908691,
      "learning_rate": 2.316829007612642e-06,
      "loss": 1.5305,
      "step": 12206
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.033939838409424,
      "learning_rate": 2.311697578249894e-06,
      "loss": 1.5077,
      "step": 12207
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1385095119476318,
      "learning_rate": 2.3065717714109036e-06,
      "loss": 1.1252,
      "step": 12208
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.166043281555176,
      "learning_rate": 2.3014515873907127e-06,
      "loss": 1.6852,
      "step": 12209
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2315030097961426,
      "learning_rate": 2.296337026484008e-06,
      "loss": 1.6541,
      "step": 12210
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2919716835021973,
      "learning_rate": 2.2912280889851756e-06,
      "loss": 1.3714,
      "step": 12211
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6308774948120117,
      "learning_rate": 2.2861247751882474e-06,
      "loss": 1.3201,
      "step": 12212
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4096429347991943,
      "learning_rate": 2.2810270853869553e-06,
      "loss": 1.3058,
      "step": 12213
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.501421570777893,
      "learning_rate": 2.2759350198746976e-06,
      "loss": 1.6126,
      "step": 12214
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3719677925109863,
      "learning_rate": 2.270848578944562e-06,
      "loss": 1.2867,
      "step": 12215
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.8136236667633057,
      "learning_rate": 2.2657677628892924e-06,
      "loss": 1.3951,
      "step": 12216
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.582552671432495,
      "learning_rate": 2.2606925720013328e-06,
      "loss": 1.4533,
      "step": 12217
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5020995140075684,
      "learning_rate": 2.255623006572771e-06,
      "loss": 1.4671,
      "step": 12218
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.220381736755371,
      "learning_rate": 2.250559066895397e-06,
      "loss": 2.2999,
      "step": 12219
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7104097604751587,
      "learning_rate": 2.2455007532606655e-06,
      "loss": 1.4005,
      "step": 12220
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3715476989746094,
      "learning_rate": 2.2404480659597217e-06,
      "loss": 1.5001,
      "step": 12221
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.921472430229187,
      "learning_rate": 2.2354010052833774e-06,
      "loss": 1.4978,
      "step": 12222
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.1019090414047241,
      "learning_rate": 2.2303595715221004e-06,
      "loss": 1.2985,
      "step": 12223
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3506748676300049,
      "learning_rate": 2.225323764966081e-06,
      "loss": 1.9852,
      "step": 12224
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.582543134689331,
      "learning_rate": 2.220293585905142e-06,
      "loss": 1.1513,
      "step": 12225
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8288297653198242,
      "learning_rate": 2.2152690346288084e-06,
      "loss": 1.2426,
      "step": 12226
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3415932655334473,
      "learning_rate": 2.21025011142626e-06,
      "loss": 0.8095,
      "step": 12227
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.13321590423584,
      "learning_rate": 2.2052368165863757e-06,
      "loss": 1.5497,
      "step": 12228
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5252896547317505,
      "learning_rate": 2.2002291503976813e-06,
      "loss": 1.2532,
      "step": 12229
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8184146881103516,
      "learning_rate": 2.1952271131484236e-06,
      "loss": 1.4006,
      "step": 12230
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4500658512115479,
      "learning_rate": 2.1902307051264837e-06,
      "loss": 1.6618,
      "step": 12231
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.258929967880249,
      "learning_rate": 2.1852399266194314e-06,
      "loss": 1.5603,
      "step": 12232
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.492403507232666,
      "learning_rate": 2.1802547779145257e-06,
      "loss": 1.5784,
      "step": 12233
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.808487892150879,
      "learning_rate": 2.1752752592986815e-06,
      "loss": 1.6176,
      "step": 12234
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3009393215179443,
      "learning_rate": 2.170301371058503e-06,
      "loss": 1.1224,
      "step": 12235
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5282412767410278,
      "learning_rate": 2.1653331134802613e-06,
      "loss": 1.0128,
      "step": 12236
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.707964539527893,
      "learning_rate": 2.1603704868499162e-06,
      "loss": 1.5067,
      "step": 12237
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.0625180006027222,
      "learning_rate": 2.1554134914530843e-06,
      "loss": 1.0106,
      "step": 12238
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4999370574951172,
      "learning_rate": 2.15046212757507e-06,
      "loss": 0.9493,
      "step": 12239
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3127822875976562,
      "learning_rate": 2.1455163955008683e-06,
      "loss": 1.7256,
      "step": 12240
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.295539379119873,
      "learning_rate": 2.1405762955151176e-06,
      "loss": 1.2283,
      "step": 12241
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.060934066772461,
      "learning_rate": 2.135641827902157e-06,
      "loss": 1.2804,
      "step": 12242
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3593907356262207,
      "learning_rate": 2.1307129929459934e-06,
      "loss": 1.5339,
      "step": 12243
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5345417261123657,
      "learning_rate": 2.1257897909303103e-06,
      "loss": 1.7916,
      "step": 12244
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.508071780204773,
      "learning_rate": 2.12087222213847e-06,
      "loss": 0.9821,
      "step": 12245
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8333077430725098,
      "learning_rate": 2.115960286853491e-06,
      "loss": 1.6527,
      "step": 12246
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2892614603042603,
      "learning_rate": 2.1110539853581025e-06,
      "loss": 1.6442,
      "step": 12247
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6978100538253784,
      "learning_rate": 2.106153317934667e-06,
      "loss": 2.2112,
      "step": 12248
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6917978525161743,
      "learning_rate": 2.101258284865271e-06,
      "loss": 1.6227,
      "step": 12249
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3626785278320312,
      "learning_rate": 2.0963688864316323e-06,
      "loss": 1.5939,
      "step": 12250
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6190755367279053,
      "learning_rate": 2.091485122915182e-06,
      "loss": 1.0442,
      "step": 12251
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5268054008483887,
      "learning_rate": 2.086606994597007e-06,
      "loss": 1.6501,
      "step": 12252
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.196310043334961,
      "learning_rate": 2.0817345017578484e-06,
      "loss": 1.3559,
      "step": 12253
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.054989218711853,
      "learning_rate": 2.076867644678171e-06,
      "loss": 0.7922,
      "step": 12254
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4321759939193726,
      "learning_rate": 2.0720064236380842e-06,
      "loss": 0.9356,
      "step": 12255
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.350553035736084,
      "learning_rate": 2.0671508389173757e-06,
      "loss": 1.9584,
      "step": 12256
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4397474527359009,
      "learning_rate": 2.062300890795499e-06,
      "loss": 1.3656,
      "step": 12257
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5773509740829468,
      "learning_rate": 2.057456579551631e-06,
      "loss": 0.829,
      "step": 12258
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.204566240310669,
      "learning_rate": 2.05261790546456e-06,
      "loss": 1.5345,
      "step": 12259
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2148115634918213,
      "learning_rate": 2.0477848688127856e-06,
      "loss": 1.4408,
      "step": 12260
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1383607387542725,
      "learning_rate": 2.042957469874485e-06,
      "loss": 0.856,
      "step": 12261
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3724734783172607,
      "learning_rate": 2.038135708927491e-06,
      "loss": 0.5505,
      "step": 12262
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0052180290222168,
      "learning_rate": 2.0333195862493493e-06,
      "loss": 1.1467,
      "step": 12263
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9927300214767456,
      "learning_rate": 2.0285091021172263e-06,
      "loss": 0.9809,
      "step": 12264
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4993889331817627,
      "learning_rate": 2.0237042568080012e-06,
      "loss": 1.0402,
      "step": 12265
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4015861749649048,
      "learning_rate": 2.018905050598219e-06,
      "loss": 1.4992,
      "step": 12266
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0347137451171875,
      "learning_rate": 2.014111483764114e-06,
      "loss": 1.4851,
      "step": 12267
    },
    {
      "epoch": 0.94,
      "grad_norm": 5.029778957366943,
      "learning_rate": 2.009323556581566e-06,
      "loss": 3.0192,
      "step": 12268
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.306828022003174,
      "learning_rate": 2.0045412693261654e-06,
      "loss": 2.2566,
      "step": 12269
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0171396732330322,
      "learning_rate": 1.999764622273148e-06,
      "loss": 1.0673,
      "step": 12270
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.745896577835083,
      "learning_rate": 1.9949936156974382e-06,
      "loss": 1.259,
      "step": 12271
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6274486780166626,
      "learning_rate": 1.990228249873649e-06,
      "loss": 1.4767,
      "step": 12272
    },
    {
      "epoch": 0.94,
      "grad_norm": 5.713601589202881,
      "learning_rate": 1.98546852507604e-06,
      "loss": 1.2488,
      "step": 12273
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3629653453826904,
      "learning_rate": 1.980714441578557e-06,
      "loss": 1.099,
      "step": 12274
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3898975849151611,
      "learning_rate": 1.9759659996548384e-06,
      "loss": 1.2218,
      "step": 12275
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4363274574279785,
      "learning_rate": 1.971223199578176e-06,
      "loss": 0.8345,
      "step": 12276
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4868931770324707,
      "learning_rate": 1.9664860416215403e-06,
      "loss": 1.2806,
      "step": 12277
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9510504007339478,
      "learning_rate": 1.961754526057602e-06,
      "loss": 1.4688,
      "step": 12278
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9246500730514526,
      "learning_rate": 1.9570286531586655e-06,
      "loss": 1.3657,
      "step": 12279
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.013896942138672,
      "learning_rate": 1.9523084231967358e-06,
      "loss": 1.6622,
      "step": 12280
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4287147521972656,
      "learning_rate": 1.9475938364435063e-06,
      "loss": 1.7733,
      "step": 12281
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7577552795410156,
      "learning_rate": 1.9428848931703157e-06,
      "loss": 2.0549,
      "step": 12282
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.259599447250366,
      "learning_rate": 1.9381815936481805e-06,
      "loss": 1.3365,
      "step": 12283
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5597103834152222,
      "learning_rate": 1.9334839381478176e-06,
      "loss": 1.4142,
      "step": 12284
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4438951015472412,
      "learning_rate": 1.9287919269396105e-06,
      "loss": 1.2423,
      "step": 12285
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3208709955215454,
      "learning_rate": 1.9241055602935877e-06,
      "loss": 1.5522,
      "step": 12286
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8489028215408325,
      "learning_rate": 1.9194248384794887e-06,
      "loss": 0.9626,
      "step": 12287
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6028450727462769,
      "learning_rate": 1.914749761766732e-06,
      "loss": 1.1396,
      "step": 12288
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.453892469406128,
      "learning_rate": 1.9100803304243577e-06,
      "loss": 1.9558,
      "step": 12289
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.136661410331726,
      "learning_rate": 1.9054165447211502e-06,
      "loss": 1.1885,
      "step": 12290
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.180145502090454,
      "learning_rate": 1.90075840492554e-06,
      "loss": 1.5296,
      "step": 12291
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3099337816238403,
      "learning_rate": 1.8961059113056123e-06,
      "loss": 1.1151,
      "step": 12292
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8430681228637695,
      "learning_rate": 1.8914590641291418e-06,
      "loss": 1.4868,
      "step": 12293
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9257107377052307,
      "learning_rate": 1.8868178636636035e-06,
      "loss": 0.8,
      "step": 12294
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3871608972549438,
      "learning_rate": 1.882182310176095e-06,
      "loss": 1.6315,
      "step": 12295
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1081424951553345,
      "learning_rate": 1.8775524039334469e-06,
      "loss": 1.1815,
      "step": 12296
    },
    {
      "epoch": 0.94,
      "grad_norm": 5.535292625427246,
      "learning_rate": 1.8729281452021241e-06,
      "loss": 1.3745,
      "step": 12297
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1966946125030518,
      "learning_rate": 1.8683095342482692e-06,
      "loss": 1.3661,
      "step": 12298
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.255892038345337,
      "learning_rate": 1.8636965713377364e-06,
      "loss": 1.0131,
      "step": 12299
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5305378437042236,
      "learning_rate": 1.8590892567360129e-06,
      "loss": 0.6972,
      "step": 12300
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.150386095046997,
      "learning_rate": 1.854487590708276e-06,
      "loss": 1.828,
      "step": 12301
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1728991270065308,
      "learning_rate": 1.849891573519369e-06,
      "loss": 1.4037,
      "step": 12302
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4843300580978394,
      "learning_rate": 1.8453012054338358e-06,
      "loss": 1.1547,
      "step": 12303
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3411592245101929,
      "learning_rate": 1.8407164867158655e-06,
      "loss": 0.9827,
      "step": 12304
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7187645435333252,
      "learning_rate": 1.8361374176293467e-06,
      "loss": 1.0239,
      "step": 12305
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.129741668701172,
      "learning_rate": 1.8315639984378242e-06,
      "loss": 0.8981,
      "step": 12306
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.768304705619812,
      "learning_rate": 1.8269962294045207e-06,
      "loss": 1.157,
      "step": 12307
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.584696650505066,
      "learning_rate": 1.8224341107923483e-06,
      "loss": 1.425,
      "step": 12308
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.372338056564331,
      "learning_rate": 1.8178776428638745e-06,
      "loss": 1.5926,
      "step": 12309
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5350390672683716,
      "learning_rate": 1.8133268258813563e-06,
      "loss": 1.3411,
      "step": 12310
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9555829167366028,
      "learning_rate": 1.8087816601067176e-06,
      "loss": 0.4963,
      "step": 12311
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7257215976715088,
      "learning_rate": 1.80424214580156e-06,
      "loss": 1.2017,
      "step": 12312
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3927232027053833,
      "learning_rate": 1.7997082832271416e-06,
      "loss": 1.2438,
      "step": 12313
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7432448863983154,
      "learning_rate": 1.7951800726444423e-06,
      "loss": 0.865,
      "step": 12314
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.503655433654785,
      "learning_rate": 1.7906575143140647e-06,
      "loss": 0.8539,
      "step": 12315
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0920019149780273,
      "learning_rate": 1.7861406084963116e-06,
      "loss": 0.9464,
      "step": 12316
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4825109243392944,
      "learning_rate": 1.7816293554511644e-06,
      "loss": 1.7342,
      "step": 12317
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1460814476013184,
      "learning_rate": 1.7771237554382703e-06,
      "loss": 1.8932,
      "step": 12318
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6382938623428345,
      "learning_rate": 1.7726238087169445e-06,
      "loss": 1.2855,
      "step": 12319
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.289384365081787,
      "learning_rate": 1.7681295155461909e-06,
      "loss": 0.9448,
      "step": 12320
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5495284795761108,
      "learning_rate": 1.7636408761846913e-06,
      "loss": 1.372,
      "step": 12321
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2173597812652588,
      "learning_rate": 1.7591578908907724e-06,
      "loss": 0.6548,
      "step": 12322
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.469283103942871,
      "learning_rate": 1.7546805599224614e-06,
      "loss": 1.3378,
      "step": 12323
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2562845945358276,
      "learning_rate": 1.750208883537463e-06,
      "loss": 1.2933,
      "step": 12324
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.240818977355957,
      "learning_rate": 1.745742861993138e-06,
      "loss": 1.6059,
      "step": 12325
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.882995843887329,
      "learning_rate": 1.7412824955465369e-06,
      "loss": 0.8277,
      "step": 12326
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.521823525428772,
      "learning_rate": 1.7368277844543978e-06,
      "loss": 1.629,
      "step": 12327
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.453315019607544,
      "learning_rate": 1.732378728973072e-06,
      "loss": 1.2371,
      "step": 12328
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1389285326004028,
      "learning_rate": 1.7279353293586765e-06,
      "loss": 1.0209,
      "step": 12329
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2911677360534668,
      "learning_rate": 1.7234975858669178e-06,
      "loss": 1.7547,
      "step": 12330
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.362899661064148,
      "learning_rate": 1.719065498753236e-06,
      "loss": 1.7166,
      "step": 12331
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.325299620628357,
      "learning_rate": 1.7146390682726943e-06,
      "loss": 0.7808,
      "step": 12332
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1946083307266235,
      "learning_rate": 1.7102182946800993e-06,
      "loss": 1.3808,
      "step": 12333
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1761564016342163,
      "learning_rate": 1.7058031782298588e-06,
      "loss": 1.6014,
      "step": 12334
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5636087656021118,
      "learning_rate": 1.7013937191761031e-06,
      "loss": 1.5571,
      "step": 12335
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2280247211456299,
      "learning_rate": 1.696989917772629e-06,
      "loss": 1.1262,
      "step": 12336
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.634477972984314,
      "learning_rate": 1.6925917742728892e-06,
      "loss": 1.4972,
      "step": 12337
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9855983257293701,
      "learning_rate": 1.6881992889300258e-06,
      "loss": 1.1515,
      "step": 12338
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0144870281219482,
      "learning_rate": 1.6838124619968365e-06,
      "loss": 1.1328,
      "step": 12339
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5980576276779175,
      "learning_rate": 1.6794312937258417e-06,
      "loss": 1.1109,
      "step": 12340
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7458146810531616,
      "learning_rate": 1.6750557843691617e-06,
      "loss": 1.2498,
      "step": 12341
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4943078756332397,
      "learning_rate": 1.6706859341786729e-06,
      "loss": 1.1401,
      "step": 12342
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5574710369110107,
      "learning_rate": 1.666321743405852e-06,
      "loss": 2.1984,
      "step": 12343
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.9168559312820435,
      "learning_rate": 1.6619632123019091e-06,
      "loss": 1.0349,
      "step": 12344
    },
    {
      "epoch": 0.94,
      "grad_norm": 5.7119975090026855,
      "learning_rate": 1.657610341117688e-06,
      "loss": 1.3462,
      "step": 12345
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7250218391418457,
      "learning_rate": 1.6532631301037215e-06,
      "loss": 1.3022,
      "step": 12346
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.179703950881958,
      "learning_rate": 1.6489215795102097e-06,
      "loss": 0.9581,
      "step": 12347
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.245931386947632,
      "learning_rate": 1.6445856895870637e-06,
      "loss": 1.61,
      "step": 12348
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.086442470550537,
      "learning_rate": 1.6402554605838172e-06,
      "loss": 1.3468,
      "step": 12349
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.723353862762451,
      "learning_rate": 1.635930892749693e-06,
      "loss": 1.1912,
      "step": 12350
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5570824146270752,
      "learning_rate": 1.6316119863336143e-06,
      "loss": 2.494,
      "step": 12351
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9692630767822266,
      "learning_rate": 1.6272987415841267e-06,
      "loss": 1.2591,
      "step": 12352
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0282845497131348,
      "learning_rate": 1.6229911587495205e-06,
      "loss": 1.1328,
      "step": 12353
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.6132540702819824,
      "learning_rate": 1.6186892380776975e-06,
      "loss": 1.418,
      "step": 12354
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6614686250686646,
      "learning_rate": 1.6143929798162704e-06,
      "loss": 1.3085,
      "step": 12355
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1645491123199463,
      "learning_rate": 1.6101023842125085e-06,
      "loss": 1.5695,
      "step": 12356
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.049225330352783,
      "learning_rate": 1.6058174515133583e-06,
      "loss": 2.413,
      "step": 12357
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.9747207164764404,
      "learning_rate": 1.6015381819654561e-06,
      "loss": 1.4888,
      "step": 12358
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2443292140960693,
      "learning_rate": 1.5972645758150717e-06,
      "loss": 1.1003,
      "step": 12359
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3290854692459106,
      "learning_rate": 1.592996633308197e-06,
      "loss": 1.6639,
      "step": 12360
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.257628083229065,
      "learning_rate": 1.5887343546904687e-06,
      "loss": 1.9284,
      "step": 12361
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.606434941291809,
      "learning_rate": 1.584477740207213e-06,
      "loss": 1.4049,
      "step": 12362
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1641244888305664,
      "learning_rate": 1.5802267901034006e-06,
      "loss": 1.6001,
      "step": 12363
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.1930184364318848,
      "learning_rate": 1.5759815046237359e-06,
      "loss": 0.9162,
      "step": 12364
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.148276448249817,
      "learning_rate": 1.5717418840125232e-06,
      "loss": 1.2198,
      "step": 12365
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2695249319076538,
      "learning_rate": 1.5675079285138006e-06,
      "loss": 1.191,
      "step": 12366
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3020365238189697,
      "learning_rate": 1.5632796383712511e-06,
      "loss": 1.6828,
      "step": 12367
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0813688039779663,
      "learning_rate": 1.5590570138282246e-06,
      "loss": 1.3817,
      "step": 12368
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1421091556549072,
      "learning_rate": 1.554840055127771e-06,
      "loss": 1.2835,
      "step": 12369
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.9073739051818848,
      "learning_rate": 1.550628762512596e-06,
      "loss": 1.6139,
      "step": 12370
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9006649255752563,
      "learning_rate": 1.5464231362250835e-06,
      "loss": 1.6419,
      "step": 12371
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3048688173294067,
      "learning_rate": 1.5422231765072847e-06,
      "loss": 1.0477,
      "step": 12372
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.501834750175476,
      "learning_rate": 1.5380288836009504e-06,
      "loss": 1.6414,
      "step": 12373
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4060219526290894,
      "learning_rate": 1.5338402577474653e-06,
      "loss": 1.4076,
      "step": 12374
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2350636720657349,
      "learning_rate": 1.5296572991879254e-06,
      "loss": 0.9089,
      "step": 12375
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4474948644638062,
      "learning_rate": 1.5254800081630826e-06,
      "loss": 1.1292,
      "step": 12376
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.057513952255249,
      "learning_rate": 1.521308384913356e-06,
      "loss": 1.5361,
      "step": 12377
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.2299931049346924,
      "learning_rate": 1.5171424296788305e-06,
      "loss": 1.0547,
      "step": 12378
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7280758619308472,
      "learning_rate": 1.5129821426993152e-06,
      "loss": 1.3972,
      "step": 12379
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9228847026824951,
      "learning_rate": 1.5088275242142402e-06,
      "loss": 1.6928,
      "step": 12380
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5767617225646973,
      "learning_rate": 1.5046785744627256e-06,
      "loss": 1.4942,
      "step": 12381
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.0760159492492676,
      "learning_rate": 1.5005352936835805e-06,
      "loss": 1.6651,
      "step": 12382
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1224210262298584,
      "learning_rate": 1.4963976821152581e-06,
      "loss": 1.1473,
      "step": 12383
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.566707968711853,
      "learning_rate": 1.4922657399959128e-06,
      "loss": 1.708,
      "step": 12384
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.616375684738159,
      "learning_rate": 1.488139467563354e-06,
      "loss": 1.0832,
      "step": 12385
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.340583086013794,
      "learning_rate": 1.4840188650550923e-06,
      "loss": 1.8933,
      "step": 12386
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.583335518836975,
      "learning_rate": 1.4799039327082598e-06,
      "loss": 1.4692,
      "step": 12387
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3510578870773315,
      "learning_rate": 1.4757946707597115e-06,
      "loss": 1.7303,
      "step": 12388
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5771204233169556,
      "learning_rate": 1.4716910794459581e-06,
      "loss": 1.6371,
      "step": 12389
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6967339515686035,
      "learning_rate": 1.4675931590031889e-06,
      "loss": 1.3011,
      "step": 12390
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3284012079238892,
      "learning_rate": 1.4635009096672702e-06,
      "loss": 1.4685,
      "step": 12391
    },
    {
      "epoch": 0.95,
      "grad_norm": 4.05318546295166,
      "learning_rate": 1.459414331673703e-06,
      "loss": 1.4237,
      "step": 12392
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3213999271392822,
      "learning_rate": 1.455333425257732e-06,
      "loss": 1.1177,
      "step": 12393
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8686569929122925,
      "learning_rate": 1.4512581906542145e-06,
      "loss": 1.5748,
      "step": 12394
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2546361684799194,
      "learning_rate": 1.4471886280977064e-06,
      "loss": 0.9122,
      "step": 12395
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2212508916854858,
      "learning_rate": 1.4431247378224322e-06,
      "loss": 0.6665,
      "step": 12396
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3479931354522705,
      "learning_rate": 1.4390665200623044e-06,
      "loss": 1.1668,
      "step": 12397
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7720435857772827,
      "learning_rate": 1.4350139750508806e-06,
      "loss": 1.6046,
      "step": 12398
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8383172750473022,
      "learning_rate": 1.4309671030214189e-06,
      "loss": 0.971,
      "step": 12399
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4793014526367188,
      "learning_rate": 1.4269259042068327e-06,
      "loss": 1.7167,
      "step": 12400
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0589696168899536,
      "learning_rate": 1.4228903788397252e-06,
      "loss": 1.3851,
      "step": 12401
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5399192571640015,
      "learning_rate": 1.4188605271523547e-06,
      "loss": 1.2101,
      "step": 12402
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.086754560470581,
      "learning_rate": 1.4148363493766802e-06,
      "loss": 1.6884,
      "step": 12403
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7038617134094238,
      "learning_rate": 1.4108178457442944e-06,
      "loss": 1.5958,
      "step": 12404
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3932418823242188,
      "learning_rate": 1.4068050164864898e-06,
      "loss": 1.4176,
      "step": 12405
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.717421293258667,
      "learning_rate": 1.4027978618342486e-06,
      "loss": 1.3495,
      "step": 12406
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2421550750732422,
      "learning_rate": 1.3987963820181748e-06,
      "loss": 0.9776,
      "step": 12407
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6011242866516113,
      "learning_rate": 1.3948005772685847e-06,
      "loss": 1.4687,
      "step": 12408
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.306488037109375,
      "learning_rate": 1.3908104478154826e-06,
      "loss": 0.8766,
      "step": 12409
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6459475755691528,
      "learning_rate": 1.3868259938884964e-06,
      "loss": 1.4581,
      "step": 12410
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0465167760849,
      "learning_rate": 1.3828472157169647e-06,
      "loss": 1.2177,
      "step": 12411
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9959005117416382,
      "learning_rate": 1.3788741135298933e-06,
      "loss": 0.9933,
      "step": 12412
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6298136711120605,
      "learning_rate": 1.3749066875559547e-06,
      "loss": 1.1989,
      "step": 12413
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7200199365615845,
      "learning_rate": 1.3709449380234995e-06,
      "loss": 1.0064,
      "step": 12414
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9346699714660645,
      "learning_rate": 1.3669888651605345e-06,
      "loss": 0.7426,
      "step": 12415
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1339867115020752,
      "learning_rate": 1.3630384691947661e-06,
      "loss": 1.1902,
      "step": 12416
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.662989854812622,
      "learning_rate": 1.3590937503535683e-06,
      "loss": 1.6495,
      "step": 12417
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6613315343856812,
      "learning_rate": 1.3551547088639704e-06,
      "loss": 1.3017,
      "step": 12418
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.074471950531006,
      "learning_rate": 1.3512213449526912e-06,
      "loss": 1.4071,
      "step": 12419
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.940917491912842,
      "learning_rate": 1.3472936588461161e-06,
      "loss": 2.1482,
      "step": 12420
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2851312160491943,
      "learning_rate": 1.3433716507703197e-06,
      "loss": 1.0746,
      "step": 12421
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.8934834003448486,
      "learning_rate": 1.3394553209510109e-06,
      "loss": 1.8433,
      "step": 12422
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2673602104187012,
      "learning_rate": 1.3355446696136308e-06,
      "loss": 1.2557,
      "step": 12423
    },
    {
      "epoch": 0.95,
      "grad_norm": 5.725069999694824,
      "learning_rate": 1.3316396969832334e-06,
      "loss": 1.1104,
      "step": 12424
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9620429277420044,
      "learning_rate": 1.3277404032845719e-06,
      "loss": 1.648,
      "step": 12425
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6455190181732178,
      "learning_rate": 1.323846788742078e-06,
      "loss": 0.7774,
      "step": 12426
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4332776069641113,
      "learning_rate": 1.3199588535798724e-06,
      "loss": 2.2414,
      "step": 12427
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.346866250038147,
      "learning_rate": 1.3160765980216872e-06,
      "loss": 1.4117,
      "step": 12428
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5555660724639893,
      "learning_rate": 1.3122000222910102e-06,
      "loss": 1.4121,
      "step": 12429
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7119511365890503,
      "learning_rate": 1.30832912661093e-06,
      "loss": 1.7304,
      "step": 12430
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0497148036956787,
      "learning_rate": 1.3044639112042567e-06,
      "loss": 1.2169,
      "step": 12431
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1339328289031982,
      "learning_rate": 1.3006043762934572e-06,
      "loss": 1.7642,
      "step": 12432
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.151594400405884,
      "learning_rate": 1.2967505221006427e-06,
      "loss": 1.1565,
      "step": 12433
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7515486478805542,
      "learning_rate": 1.2929023488476576e-06,
      "loss": 1.2646,
      "step": 12434
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5643386840820312,
      "learning_rate": 1.2890598567559696e-06,
      "loss": 1.5308,
      "step": 12435
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.894150733947754,
      "learning_rate": 1.2852230460467462e-06,
      "loss": 1.7685,
      "step": 12436
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.397017240524292,
      "learning_rate": 1.2813919169408105e-06,
      "loss": 1.3059,
      "step": 12437
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.055922508239746,
      "learning_rate": 1.2775664696586531e-06,
      "loss": 1.72,
      "step": 12438
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.344016671180725,
      "learning_rate": 1.2737467044204864e-06,
      "loss": 1.1557,
      "step": 12439
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2532107830047607,
      "learning_rate": 1.2699326214461238e-06,
      "loss": 1.476,
      "step": 12440
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.309001922607422,
      "learning_rate": 1.2661242209551006e-06,
      "loss": 1.1997,
      "step": 12441
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.7823421955108643,
      "learning_rate": 1.2623215031666192e-06,
      "loss": 1.7461,
      "step": 12442
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.358079195022583,
      "learning_rate": 1.2585244682995489e-06,
      "loss": 1.4956,
      "step": 12443
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4746036529541016,
      "learning_rate": 1.2547331165724042e-06,
      "loss": 1.0453,
      "step": 12444
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1964690685272217,
      "learning_rate": 1.2509474482034433e-06,
      "loss": 1.3211,
      "step": 12445
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3406022787094116,
      "learning_rate": 1.2471674634105034e-06,
      "loss": 1.6308,
      "step": 12446
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9680908918380737,
      "learning_rate": 1.2433931624111883e-06,
      "loss": 1.4106,
      "step": 12447
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8240081071853638,
      "learning_rate": 1.2396245454227128e-06,
      "loss": 1.2408,
      "step": 12448
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.54050612449646,
      "learning_rate": 1.2358616126619704e-06,
      "loss": 0.9788,
      "step": 12449
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2526181936264038,
      "learning_rate": 1.2321043643455543e-06,
      "loss": 1.6444,
      "step": 12450
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9008949995040894,
      "learning_rate": 1.2283528006897138e-06,
      "loss": 1.3567,
      "step": 12451
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7946629524230957,
      "learning_rate": 1.2246069219103873e-06,
      "loss": 1.595,
      "step": 12452
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4153919219970703,
      "learning_rate": 1.2208667282231356e-06,
      "loss": 1.2549,
      "step": 12453
    },
    {
      "epoch": 0.95,
      "grad_norm": 4.307375431060791,
      "learning_rate": 1.2171322198432643e-06,
      "loss": 1.1354,
      "step": 12454
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.525637626647949,
      "learning_rate": 1.2134033969856907e-06,
      "loss": 1.1559,
      "step": 12455
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.652280569076538,
      "learning_rate": 1.2096802598650314e-06,
      "loss": 1.5642,
      "step": 12456
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4500547647476196,
      "learning_rate": 1.2059628086956044e-06,
      "loss": 1.1901,
      "step": 12457
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8240265846252441,
      "learning_rate": 1.202251043691327e-06,
      "loss": 1.5012,
      "step": 12458
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.755708932876587,
      "learning_rate": 1.198544965065862e-06,
      "loss": 2.1048,
      "step": 12459
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8260655403137207,
      "learning_rate": 1.1948445730325163e-06,
      "loss": 0.6606,
      "step": 12460
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1506599187850952,
      "learning_rate": 1.1911498678042642e-06,
      "loss": 1.7162,
      "step": 12461
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8642171025276184,
      "learning_rate": 1.1874608495937356e-06,
      "loss": 0.7556,
      "step": 12462
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.19804048538208,
      "learning_rate": 1.1837775186132938e-06,
      "loss": 1.8726,
      "step": 12463
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.1093425750732422,
      "learning_rate": 1.1800998750748915e-06,
      "loss": 0.9202,
      "step": 12464
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0571768283843994,
      "learning_rate": 1.1764279191902373e-06,
      "loss": 1.2074,
      "step": 12465
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.210081696510315,
      "learning_rate": 1.1727616511706508e-06,
      "loss": 1.4163,
      "step": 12466
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3058831691741943,
      "learning_rate": 1.1691010712271521e-06,
      "loss": 1.5111,
      "step": 12467
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.058268666267395,
      "learning_rate": 1.1654461795704286e-06,
      "loss": 1.3829,
      "step": 12468
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.39262855052948,
      "learning_rate": 1.161796976410856e-06,
      "loss": 1.539,
      "step": 12469
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8353804349899292,
      "learning_rate": 1.1581534619584333e-06,
      "loss": 1.0543,
      "step": 12470
    },
    {
      "epoch": 0.95,
      "grad_norm": 5.066309452056885,
      "learning_rate": 1.1545156364229037e-06,
      "loss": 2.2809,
      "step": 12471
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4066463708877563,
      "learning_rate": 1.150883500013622e-06,
      "loss": 1.5365,
      "step": 12472
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3497306108474731,
      "learning_rate": 1.1472570529396321e-06,
      "loss": 1.5813,
      "step": 12473
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5399407148361206,
      "learning_rate": 1.1436362954096668e-06,
      "loss": 0.9259,
      "step": 12474
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7348027229309082,
      "learning_rate": 1.1400212276321376e-06,
      "loss": 1.8409,
      "step": 12475
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6535855531692505,
      "learning_rate": 1.1364118498150777e-06,
      "loss": 1.7595,
      "step": 12476
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.463303565979004,
      "learning_rate": 1.1328081621662545e-06,
      "loss": 1.8624,
      "step": 12477
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5609914064407349,
      "learning_rate": 1.1292101648930797e-06,
      "loss": 1.3362,
      "step": 12478
    },
    {
      "epoch": 0.95,
      "grad_norm": 4.711037635803223,
      "learning_rate": 1.1256178582026321e-06,
      "loss": 1.8088,
      "step": 12479
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.8642249703407288,
      "learning_rate": 1.1220312423016687e-06,
      "loss": 0.5505,
      "step": 12480
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5205631256103516,
      "learning_rate": 1.1184503173966243e-06,
      "loss": 1.494,
      "step": 12481
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1297543048858643,
      "learning_rate": 1.1148750836935896e-06,
      "loss": 1.5509,
      "step": 12482
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2102525234222412,
      "learning_rate": 1.1113055413983553e-06,
      "loss": 1.0666,
      "step": 12483
    },
    {
      "epoch": 0.95,
      "grad_norm": 6.108997344970703,
      "learning_rate": 1.1077416907163574e-06,
      "loss": 2.0027,
      "step": 12484
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.615462303161621,
      "learning_rate": 1.1041835318527206e-06,
      "loss": 1.4857,
      "step": 12485
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.9549790024757385,
      "learning_rate": 1.1006310650122475e-06,
      "loss": 0.7395,
      "step": 12486
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.154374361038208,
      "learning_rate": 1.097084290399386e-06,
      "loss": 1.8947,
      "step": 12487
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2474530935287476,
      "learning_rate": 1.093543208218284e-06,
      "loss": 1.3348,
      "step": 12488
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3693019151687622,
      "learning_rate": 1.0900078186727446e-06,
      "loss": 1.7804,
      "step": 12489
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3529397249221802,
      "learning_rate": 1.0864781219662611e-06,
      "loss": 1.1331,
      "step": 12490
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.503919839859009,
      "learning_rate": 1.082954118301971e-06,
      "loss": 1.1584,
      "step": 12491
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7129117250442505,
      "learning_rate": 1.0794358078827116e-06,
      "loss": 1.0975,
      "step": 12492
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6763042211532593,
      "learning_rate": 1.075923190910988e-06,
      "loss": 1.0986,
      "step": 12493
    },
    {
      "epoch": 0.95,
      "grad_norm": 4.267309665679932,
      "learning_rate": 1.0724162675889604e-06,
      "loss": 1.8026,
      "step": 12494
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.330340027809143,
      "learning_rate": 1.0689150381184677e-06,
      "loss": 1.7826,
      "step": 12495
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5857654809951782,
      "learning_rate": 1.065419502701037e-06,
      "loss": 1.5547,
      "step": 12496
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3271653652191162,
      "learning_rate": 1.0619296615378639e-06,
      "loss": 1.7366,
      "step": 12497
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6450214385986328,
      "learning_rate": 1.0584455148297978e-06,
      "loss": 1.4082,
      "step": 12498
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.097350597381592,
      "learning_rate": 1.0549670627773567e-06,
      "loss": 1.9954,
      "step": 12499
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5841329097747803,
      "learning_rate": 1.0514943055807802e-06,
      "loss": 1.4497,
      "step": 12500
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2934640645980835,
      "learning_rate": 1.0480272434399085e-06,
      "loss": 1.3954,
      "step": 12501
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7415671348571777,
      "learning_rate": 1.0445658765543153e-06,
      "loss": 1.0081,
      "step": 12502
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0393226146698,
      "learning_rate": 1.041110205123219e-06,
      "loss": 1.4899,
      "step": 12503
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.0980629920959473,
      "learning_rate": 1.037660229345505e-06,
      "loss": 0.7794,
      "step": 12504
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.26971435546875,
      "learning_rate": 1.034215949419748e-06,
      "loss": 1.9677,
      "step": 12505
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.085326910018921,
      "learning_rate": 1.0307773655441777e-06,
      "loss": 1.6497,
      "step": 12506
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8831132650375366,
      "learning_rate": 1.027344477916714e-06,
      "loss": 1.0873,
      "step": 12507
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5733325481414795,
      "learning_rate": 1.0239172867349322e-06,
      "loss": 1.5853,
      "step": 12508
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5181792974472046,
      "learning_rate": 1.0204957921960968e-06,
      "loss": 1.4268,
      "step": 12509
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9362750053405762,
      "learning_rate": 1.0170799944971165e-06,
      "loss": 1.7594,
      "step": 12510
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.083143711090088,
      "learning_rate": 1.0136698938346011e-06,
      "loss": 1.7187,
      "step": 12511
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6239078044891357,
      "learning_rate": 1.010265490404827e-06,
      "loss": 1.5832,
      "step": 12512
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.756421446800232,
      "learning_rate": 1.006866784403715e-06,
      "loss": 0.9261,
      "step": 12513
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7656954526901245,
      "learning_rate": 1.0034737760269087e-06,
      "loss": 0.9382,
      "step": 12514
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.5264387130737305,
      "learning_rate": 1.0000864654696852e-06,
      "loss": 1.5148,
      "step": 12515
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6580678224563599,
      "learning_rate": 9.967048529269884e-07,
      "loss": 1.1406,
      "step": 12516
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6043585538864136,
      "learning_rate": 9.93328938593474e-07,
      "loss": 1.2933,
      "step": 12517
    },
    {
      "epoch": 0.96,
      "grad_norm": 4.275860786437988,
      "learning_rate": 9.89958722663431e-07,
      "loss": 1.6645,
      "step": 12518
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.361128330230713,
      "learning_rate": 9.865942053308374e-07,
      "loss": 1.2455,
      "step": 12519
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3645961284637451,
      "learning_rate": 9.832353867893386e-07,
      "loss": 1.1165,
      "step": 12520
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.31527578830719,
      "learning_rate": 9.798822672322572e-07,
      "loss": 1.6426,
      "step": 12521
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5916823148727417,
      "learning_rate": 9.765348468525726e-07,
      "loss": 1.9837,
      "step": 12522
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2545292377471924,
      "learning_rate": 9.731931258429638e-07,
      "loss": 1.4926,
      "step": 12523
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3852726221084595,
      "learning_rate": 9.69857104395766e-07,
      "loss": 1.1965,
      "step": 12524
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4001708030700684,
      "learning_rate": 9.665267827029812e-07,
      "loss": 1.3934,
      "step": 12525
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3356674909591675,
      "learning_rate": 9.632021609562891e-07,
      "loss": 1.1899,
      "step": 12526
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6908233165740967,
      "learning_rate": 9.59883239347037e-07,
      "loss": 2.0903,
      "step": 12527
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8863763809204102,
      "learning_rate": 9.565700180662496e-07,
      "loss": 1.4178,
      "step": 12528
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1106818914413452,
      "learning_rate": 9.532624973046301e-07,
      "loss": 0.8882,
      "step": 12529
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9830242395401001,
      "learning_rate": 9.499606772525371e-07,
      "loss": 0.9553,
      "step": 12530
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2398298978805542,
      "learning_rate": 9.466645581000077e-07,
      "loss": 0.4435,
      "step": 12531
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.12821888923645,
      "learning_rate": 9.433741400367457e-07,
      "loss": 1.4267,
      "step": 12532
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4698964357376099,
      "learning_rate": 9.400894232521662e-07,
      "loss": 1.1341,
      "step": 12533
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.438378930091858,
      "learning_rate": 9.368104079352846e-07,
      "loss": 1.3823,
      "step": 12534
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.07840633392334,
      "learning_rate": 9.335370942748389e-07,
      "loss": 1.7207,
      "step": 12535
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.8206300735473633,
      "learning_rate": 9.30269482459245e-07,
      "loss": 1.77,
      "step": 12536
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.613439679145813,
      "learning_rate": 9.270075726765637e-07,
      "loss": 1.5147,
      "step": 12537
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2216356992721558,
      "learning_rate": 9.237513651145225e-07,
      "loss": 1.6016,
      "step": 12538
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.063047170639038,
      "learning_rate": 9.205008599605491e-07,
      "loss": 1.1431,
      "step": 12539
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.561284065246582,
      "learning_rate": 9.172560574017164e-07,
      "loss": 0.8098,
      "step": 12540
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.344840407371521,
      "learning_rate": 9.140169576247858e-07,
      "loss": 0.9311,
      "step": 12541
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.557826519012451,
      "learning_rate": 9.107835608162086e-07,
      "loss": 1.1326,
      "step": 12542
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6330931186676025,
      "learning_rate": 9.075558671620465e-07,
      "loss": 1.6894,
      "step": 12543
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6490013599395752,
      "learning_rate": 9.043338768481069e-07,
      "loss": 1.8953,
      "step": 12544
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8961321115493774,
      "learning_rate": 9.01117590059819e-07,
      "loss": 0.9677,
      "step": 12545
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8578622341156006,
      "learning_rate": 8.979070069822904e-07,
      "loss": 1.0731,
      "step": 12546
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4220505952835083,
      "learning_rate": 8.947021278003176e-07,
      "loss": 1.6169,
      "step": 12547
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8989812135696411,
      "learning_rate": 8.915029526983642e-07,
      "loss": 1.8034,
      "step": 12548
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1208851337432861,
      "learning_rate": 8.883094818605497e-07,
      "loss": 1.4587,
      "step": 12549
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4965757131576538,
      "learning_rate": 8.851217154706826e-07,
      "loss": 1.1838,
      "step": 12550
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2502282857894897,
      "learning_rate": 8.819396537122493e-07,
      "loss": 1.1185,
      "step": 12551
    },
    {
      "epoch": 0.96,
      "grad_norm": 4.882866859436035,
      "learning_rate": 8.787632967683701e-07,
      "loss": 1.2833,
      "step": 12552
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2167528867721558,
      "learning_rate": 8.755926448218876e-07,
      "loss": 1.1497,
      "step": 12553
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.938853144645691,
      "learning_rate": 8.724276980552781e-07,
      "loss": 1.5,
      "step": 12554
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.427680253982544,
      "learning_rate": 8.692684566506959e-07,
      "loss": 1.9151,
      "step": 12555
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4383151531219482,
      "learning_rate": 8.661149207899844e-07,
      "loss": 1.56,
      "step": 12556
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3808568716049194,
      "learning_rate": 8.629670906546539e-07,
      "loss": 1.5266,
      "step": 12557
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0985517501831055,
      "learning_rate": 8.598249664258595e-07,
      "loss": 1.5626,
      "step": 12558
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2367714643478394,
      "learning_rate": 8.566885482844678e-07,
      "loss": 1.4208,
      "step": 12559
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9493436813354492,
      "learning_rate": 8.535578364109897e-07,
      "loss": 0.9367,
      "step": 12560
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7002129554748535,
      "learning_rate": 8.504328309856147e-07,
      "loss": 1.2732,
      "step": 12561
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2694177627563477,
      "learning_rate": 8.473135321882098e-07,
      "loss": 0.7511,
      "step": 12562
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0655581951141357,
      "learning_rate": 8.441999401983091e-07,
      "loss": 1.2482,
      "step": 12563
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6806111335754395,
      "learning_rate": 8.410920551951029e-07,
      "loss": 0.9437,
      "step": 12564
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1996911764144897,
      "learning_rate": 8.379898773574924e-07,
      "loss": 1.2991,
      "step": 12565
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.012903928756714,
      "learning_rate": 8.348934068640013e-07,
      "loss": 1.4386,
      "step": 12566
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.480828046798706,
      "learning_rate": 8.31802643892865e-07,
      "loss": 1.1521,
      "step": 12567
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5658233165740967,
      "learning_rate": 8.287175886219633e-07,
      "loss": 1.2791,
      "step": 12568
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6174266338348389,
      "learning_rate": 8.256382412288766e-07,
      "loss": 1.7221,
      "step": 12569
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1755074262619019,
      "learning_rate": 8.225646018908184e-07,
      "loss": 1.7593,
      "step": 12570
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.006877899169922,
      "learning_rate": 8.19496670784714e-07,
      "loss": 1.6637,
      "step": 12571
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4087820053100586,
      "learning_rate": 8.164344480871223e-07,
      "loss": 1.6769,
      "step": 12572
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1343414783477783,
      "learning_rate": 8.13377933974302e-07,
      "loss": 1.4307,
      "step": 12573
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4453374147415161,
      "learning_rate": 8.103271286221792e-07,
      "loss": 1.2859,
      "step": 12574
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.0908353328704834,
      "learning_rate": 8.072820322063245e-07,
      "loss": 2.0761,
      "step": 12575
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4741026163101196,
      "learning_rate": 8.042426449020202e-07,
      "loss": 1.618,
      "step": 12576
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7186095714569092,
      "learning_rate": 8.012089668841927e-07,
      "loss": 1.459,
      "step": 12577
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9728862047195435,
      "learning_rate": 7.981809983274469e-07,
      "loss": 0.8809,
      "step": 12578
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1899993419647217,
      "learning_rate": 7.951587394060655e-07,
      "loss": 1.3901,
      "step": 12579
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5778330564498901,
      "learning_rate": 7.921421902939874e-07,
      "loss": 0.7262,
      "step": 12580
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0771136283874512,
      "learning_rate": 7.891313511648401e-07,
      "loss": 1.0909,
      "step": 12581
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6731505393981934,
      "learning_rate": 7.861262221919075e-07,
      "loss": 2.0434,
      "step": 12582
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4258743524551392,
      "learning_rate": 7.831268035481731e-07,
      "loss": 0.8015,
      "step": 12583
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2833781242370605,
      "learning_rate": 7.801330954062436e-07,
      "loss": 1.3069,
      "step": 12584
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0583605766296387,
      "learning_rate": 7.771450979384365e-07,
      "loss": 1.4129,
      "step": 12585
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7274658679962158,
      "learning_rate": 7.741628113167254e-07,
      "loss": 1.123,
      "step": 12586
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2991511821746826,
      "learning_rate": 7.71186235712762e-07,
      "loss": 1.268,
      "step": 12587
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.109966516494751,
      "learning_rate": 7.682153712978535e-07,
      "loss": 2.0453,
      "step": 12588
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3714808225631714,
      "learning_rate": 7.652502182430077e-07,
      "loss": 1.2569,
      "step": 12589
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.631244421005249,
      "learning_rate": 7.622907767188881e-07,
      "loss": 1.4709,
      "step": 12590
    },
    {
      "epoch": 0.96,
      "grad_norm": 4.8143510818481445,
      "learning_rate": 7.593370468958028e-07,
      "loss": 2.005,
      "step": 12591
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.271888256072998,
      "learning_rate": 7.563890289437825e-07,
      "loss": 1.5478,
      "step": 12592
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4722617864608765,
      "learning_rate": 7.534467230324805e-07,
      "loss": 2.2423,
      "step": 12593
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.8050048351287842,
      "learning_rate": 7.505101293312611e-07,
      "loss": 0.7065,
      "step": 12594
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.132824420928955,
      "learning_rate": 7.475792480091226e-07,
      "loss": 1.5243,
      "step": 12595
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4059690237045288,
      "learning_rate": 7.446540792347856e-07,
      "loss": 1.3485,
      "step": 12596
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9572029113769531,
      "learning_rate": 7.41734623176571e-07,
      "loss": 1.863,
      "step": 12597
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7235231399536133,
      "learning_rate": 7.388208800025442e-07,
      "loss": 1.6861,
      "step": 12598
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.8257455825805664,
      "learning_rate": 7.359128498803936e-07,
      "loss": 0.9967,
      "step": 12599
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.300420880317688,
      "learning_rate": 7.33010532977485e-07,
      "loss": 1.4137,
      "step": 12600
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1693073511123657,
      "learning_rate": 7.301139294608738e-07,
      "loss": 1.2928,
      "step": 12601
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7198892831802368,
      "learning_rate": 7.272230394972823e-07,
      "loss": 1.1503,
      "step": 12602
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5587698221206665,
      "learning_rate": 7.243378632530884e-07,
      "loss": 1.714,
      "step": 12603
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.055991768836975,
      "learning_rate": 7.214584008943482e-07,
      "loss": 1.2899,
      "step": 12604
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2842603921890259,
      "learning_rate": 7.185846525867956e-07,
      "loss": 1.4621,
      "step": 12605
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0469812154769897,
      "learning_rate": 7.157166184958319e-07,
      "loss": 1.3389,
      "step": 12606
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1381242275238037,
      "learning_rate": 7.128542987865249e-07,
      "loss": 1.4545,
      "step": 12607
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9781412482261658,
      "learning_rate": 7.099976936236319e-07,
      "loss": 1.1284,
      "step": 12608
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4950504302978516,
      "learning_rate": 7.071468031715434e-07,
      "loss": 1.7879,
      "step": 12609
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1078295707702637,
      "learning_rate": 7.043016275943615e-07,
      "loss": 1.4989,
      "step": 12610
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7524306774139404,
      "learning_rate": 7.014621670558441e-07,
      "loss": 1.2845,
      "step": 12611
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6235812902450562,
      "learning_rate": 6.98628421719405e-07,
      "loss": 1.5856,
      "step": 12612
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9296393990516663,
      "learning_rate": 6.95800391748147e-07,
      "loss": 0.8443,
      "step": 12613
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2193315029144287,
      "learning_rate": 6.929780773048511e-07,
      "loss": 0.9613,
      "step": 12614
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7221776247024536,
      "learning_rate": 6.901614785519428e-07,
      "loss": 1.7888,
      "step": 12615
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.776113986968994,
      "learning_rate": 6.873505956515369e-07,
      "loss": 1.2045,
      "step": 12616
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3330538272857666,
      "learning_rate": 6.845454287654263e-07,
      "loss": 1.5973,
      "step": 12617
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5708436965942383,
      "learning_rate": 6.817459780550595e-07,
      "loss": 2.2547,
      "step": 12618
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.056161642074585,
      "learning_rate": 6.78952243681541e-07,
      "loss": 1.5108,
      "step": 12619
    },
    {
      "epoch": 0.96,
      "grad_norm": 7.718677520751953,
      "learning_rate": 6.761642258056978e-07,
      "loss": 2.1638,
      "step": 12620
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3994405269622803,
      "learning_rate": 6.733819245879902e-07,
      "loss": 1.0103,
      "step": 12621
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4833691120147705,
      "learning_rate": 6.706053401885348e-07,
      "loss": 0.7632,
      "step": 12622
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2663772106170654,
      "learning_rate": 6.678344727671593e-07,
      "loss": 1.1431,
      "step": 12623
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3535196781158447,
      "learning_rate": 6.650693224833248e-07,
      "loss": 1.2518,
      "step": 12624
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4195727109909058,
      "learning_rate": 6.623098894962044e-07,
      "loss": 1.1783,
      "step": 12625
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8531359434127808,
      "learning_rate": 6.595561739646039e-07,
      "loss": 0.7467,
      "step": 12626
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7913683652877808,
      "learning_rate": 6.56808176047019e-07,
      "loss": 1.4254,
      "step": 12627
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7301077842712402,
      "learning_rate": 6.540658959016121e-07,
      "loss": 1.4292,
      "step": 12628
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2227756977081299,
      "learning_rate": 6.513293336862348e-07,
      "loss": 1.4002,
      "step": 12629
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.627333164215088,
      "learning_rate": 6.485984895583608e-07,
      "loss": 1.3551,
      "step": 12630
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4947357177734375,
      "learning_rate": 6.458733636751868e-07,
      "loss": 1.5754,
      "step": 12631
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2283620834350586,
      "learning_rate": 6.431539561935429e-07,
      "loss": 0.9231,
      "step": 12632
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3671660423278809,
      "learning_rate": 6.404402672699706e-07,
      "loss": 1.4313,
      "step": 12633
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6419161558151245,
      "learning_rate": 6.377322970606447e-07,
      "loss": 1.5228,
      "step": 12634
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5664808750152588,
      "learning_rate": 6.350300457214187e-07,
      "loss": 2.2916,
      "step": 12635
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.467314600944519,
      "learning_rate": 6.323335134078345e-07,
      "loss": 0.9483,
      "step": 12636
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.218206763267517,
      "learning_rate": 6.296427002750793e-07,
      "loss": 1.5573,
      "step": 12637
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.1702208518981934,
      "learning_rate": 6.269576064780402e-07,
      "loss": 1.9666,
      "step": 12638
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6467982530593872,
      "learning_rate": 6.242782321712492e-07,
      "loss": 1.4135,
      "step": 12639
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4546717405319214,
      "learning_rate": 6.216045775089275e-07,
      "loss": 1.7614,
      "step": 12640
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.9082382321357727,
      "learning_rate": 6.189366426449517e-07,
      "loss": 0.8262,
      "step": 12641
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.0938440561294556,
      "learning_rate": 6.16274427732888e-07,
      "loss": 0.643,
      "step": 12642
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.1931251287460327,
      "learning_rate": 6.136179329259473e-07,
      "loss": 1.7441,
      "step": 12643
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8055580854415894,
      "learning_rate": 6.109671583770293e-07,
      "loss": 1.5923,
      "step": 12644
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9010355472564697,
      "learning_rate": 6.083221042387122e-07,
      "loss": 1.579,
      "step": 12645
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.0975735187530518,
      "learning_rate": 6.056827706632185e-07,
      "loss": 1.531,
      "step": 12646
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5626109838485718,
      "learning_rate": 6.030491578024711e-07,
      "loss": 1.1937,
      "step": 12647
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3373653888702393,
      "learning_rate": 6.004212658080488e-07,
      "loss": 1.3012,
      "step": 12648
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4139575958251953,
      "learning_rate": 5.977990948311974e-07,
      "loss": 1.3718,
      "step": 12649
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1545073986053467,
      "learning_rate": 5.951826450228293e-07,
      "loss": 0.9659,
      "step": 12650
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4245188236236572,
      "learning_rate": 5.925719165335575e-07,
      "loss": 1.4352,
      "step": 12651
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.476339817047119,
      "learning_rate": 5.899669095136174e-07,
      "loss": 1.4619,
      "step": 12652
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.972003936767578,
      "learning_rate": 5.873676241129555e-07,
      "loss": 2.2631,
      "step": 12653
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4949195384979248,
      "learning_rate": 5.847740604811858e-07,
      "loss": 0.6827,
      "step": 12654
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7435461282730103,
      "learning_rate": 5.821862187675775e-07,
      "loss": 1.3498,
      "step": 12655
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5154410600662231,
      "learning_rate": 5.796040991210561e-07,
      "loss": 0.8778,
      "step": 12656
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5235732793807983,
      "learning_rate": 5.770277016902692e-07,
      "loss": 1.3682,
      "step": 12657
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.893652319908142,
      "learning_rate": 5.744570266234761e-07,
      "loss": 1.2867,
      "step": 12658
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9271103143692017,
      "learning_rate": 5.718920740686473e-07,
      "loss": 1.1504,
      "step": 12659
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1406147480010986,
      "learning_rate": 5.693328441734203e-07,
      "loss": 1.3053,
      "step": 12660
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4456299543380737,
      "learning_rate": 5.667793370850661e-07,
      "loss": 1.2895,
      "step": 12661
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7353124618530273,
      "learning_rate": 5.642315529505671e-07,
      "loss": 1.1906,
      "step": 12662
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6338316202163696,
      "learning_rate": 5.616894919165838e-07,
      "loss": 1.6948,
      "step": 12663
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.508364200592041,
      "learning_rate": 5.591531541293882e-07,
      "loss": 0.8786,
      "step": 12664
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9897782802581787,
      "learning_rate": 5.566225397349855e-07,
      "loss": 1.5149,
      "step": 12665
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.8651649951934814,
      "learning_rate": 5.540976488790262e-07,
      "loss": 1.9605,
      "step": 12666
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2336795330047607,
      "learning_rate": 5.51578481706827e-07,
      "loss": 1.1394,
      "step": 12667
    },
    {
      "epoch": 0.97,
      "grad_norm": 5.318033218383789,
      "learning_rate": 5.490650383633833e-07,
      "loss": 1.7143,
      "step": 12668
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0654844045639038,
      "learning_rate": 5.465573189933571e-07,
      "loss": 0.9563,
      "step": 12669
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5678343772888184,
      "learning_rate": 5.440553237410772e-07,
      "loss": 1.2461,
      "step": 12670
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.132995367050171,
      "learning_rate": 5.415590527505621e-07,
      "loss": 1.3691,
      "step": 12671
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.155153751373291,
      "learning_rate": 5.390685061654632e-07,
      "loss": 1.0777,
      "step": 12672
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8460843563079834,
      "learning_rate": 5.365836841291438e-07,
      "loss": 1.4591,
      "step": 12673
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4715956449508667,
      "learning_rate": 5.341045867846228e-07,
      "loss": 0.963,
      "step": 12674
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5456302165985107,
      "learning_rate": 5.31631214274575e-07,
      "loss": 1.9015,
      "step": 12675
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3051575422286987,
      "learning_rate": 5.29163566741353e-07,
      "loss": 1.4036,
      "step": 12676
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2854032516479492,
      "learning_rate": 5.26701644326999e-07,
      "loss": 1.2361,
      "step": 12677
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6041561365127563,
      "learning_rate": 5.242454471732105e-07,
      "loss": 1.3885,
      "step": 12678
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5401897430419922,
      "learning_rate": 5.217949754213414e-07,
      "loss": 1.666,
      "step": 12679
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.1253039836883545,
      "learning_rate": 5.193502292124341e-07,
      "loss": 1.8844,
      "step": 12680
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.802410364151001,
      "learning_rate": 5.169112086872097e-07,
      "loss": 1.284,
      "step": 12681
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7681574821472168,
      "learning_rate": 5.144779139860334e-07,
      "loss": 1.3507,
      "step": 12682
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6954982280731201,
      "learning_rate": 5.120503452489711e-07,
      "loss": 1.5197,
      "step": 12683
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0661942958831787,
      "learning_rate": 5.096285026157332e-07,
      "loss": 1.4124,
      "step": 12684
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1870821714401245,
      "learning_rate": 5.072123862256972e-07,
      "loss": 2.0638,
      "step": 12685
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4881149530410767,
      "learning_rate": 5.048019962179629e-07,
      "loss": 1.1648,
      "step": 12686
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.952319860458374,
      "learning_rate": 5.023973327312192e-07,
      "loss": 1.1236,
      "step": 12687
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4581712484359741,
      "learning_rate": 4.999983959039001e-07,
      "loss": 1.2096,
      "step": 12688
    },
    {
      "epoch": 0.97,
      "grad_norm": 4.719862461090088,
      "learning_rate": 4.976051858740505e-07,
      "loss": 1.7463,
      "step": 12689
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3182058334350586,
      "learning_rate": 4.95217702779438e-07,
      "loss": 1.3514,
      "step": 12690
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3383333683013916,
      "learning_rate": 4.928359467574639e-07,
      "loss": 1.4595,
      "step": 12691
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.517925977706909,
      "learning_rate": 4.904599179452185e-07,
      "loss": 1.0841,
      "step": 12692
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9329748153686523,
      "learning_rate": 4.880896164794479e-07,
      "loss": 1.1164,
      "step": 12693
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5827481746673584,
      "learning_rate": 4.857250424965875e-07,
      "loss": 1.6517,
      "step": 12694
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5895506143569946,
      "learning_rate": 4.833661961327173e-07,
      "loss": 1.5652,
      "step": 12695
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3514264822006226,
      "learning_rate": 4.810130775236177e-07,
      "loss": 1.489,
      "step": 12696
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4361969232559204,
      "learning_rate": 4.786656868047135e-07,
      "loss": 1.5884,
      "step": 12697
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8955672979354858,
      "learning_rate": 4.7632402411110775e-07,
      "loss": 1.4856,
      "step": 12698
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1083649396896362,
      "learning_rate": 4.739880895775928e-07,
      "loss": 1.6746,
      "step": 12699
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2277246713638306,
      "learning_rate": 4.7165788333860536e-07,
      "loss": 1.3372,
      "step": 12700
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4792736768722534,
      "learning_rate": 4.6933340552824945e-07,
      "loss": 1.1331,
      "step": 12701
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0916149616241455,
      "learning_rate": 4.670146562803401e-07,
      "loss": 1.5808,
      "step": 12702
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8530120849609375,
      "learning_rate": 4.6470163572830405e-07,
      "loss": 1.8764,
      "step": 12703
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3421202898025513,
      "learning_rate": 4.6239434400529023e-07,
      "loss": 1.2721,
      "step": 12704
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2918105125427246,
      "learning_rate": 4.600927812441036e-07,
      "loss": 1.1309,
      "step": 12705
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2737230062484741,
      "learning_rate": 4.577969475771826e-07,
      "loss": 1.3113,
      "step": 12706
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.3716344833374023,
      "learning_rate": 4.555068431366882e-07,
      "loss": 1.0826,
      "step": 12707
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5137791633605957,
      "learning_rate": 4.5322246805442614e-07,
      "loss": 1.2326,
      "step": 12708
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5678317546844482,
      "learning_rate": 4.5094382246186894e-07,
      "loss": 0.9971,
      "step": 12709
    },
    {
      "epoch": 0.97,
      "grad_norm": 4.546532154083252,
      "learning_rate": 4.486709064901673e-07,
      "loss": 1.7434,
      "step": 12710
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7111173868179321,
      "learning_rate": 4.4640372027014985e-07,
      "loss": 1.4094,
      "step": 12711
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6977856159210205,
      "learning_rate": 4.441422639322901e-07,
      "loss": 2.0642,
      "step": 12712
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3788671493530273,
      "learning_rate": 4.4188653760676155e-07,
      "loss": 1.2699,
      "step": 12713
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3638484477996826,
      "learning_rate": 4.3963654142339385e-07,
      "loss": 1.3484,
      "step": 12714
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6091673374176025,
      "learning_rate": 4.373922755116722e-07,
      "loss": 0.978,
      "step": 12715
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4585561752319336,
      "learning_rate": 4.351537400007821e-07,
      "loss": 1.4894,
      "step": 12716
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5679101943969727,
      "learning_rate": 4.329209350195651e-07,
      "loss": 1.6488,
      "step": 12717
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1844308376312256,
      "learning_rate": 4.306938606965183e-07,
      "loss": 1.3409,
      "step": 12718
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3246161937713623,
      "learning_rate": 4.2847251715982807e-07,
      "loss": 1.5604,
      "step": 12719
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1652624607086182,
      "learning_rate": 4.2625690453735876e-07,
      "loss": 0.9563,
      "step": 12720
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7021534442901611,
      "learning_rate": 4.2404702295660845e-07,
      "loss": 1.4966,
      "step": 12721
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5243563652038574,
      "learning_rate": 4.218428725447976e-07,
      "loss": 1.1554,
      "step": 12722
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.367304801940918,
      "learning_rate": 4.1964445342875804e-07,
      "loss": 1.5471,
      "step": 12723
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0848448276519775,
      "learning_rate": 4.174517657350441e-07,
      "loss": 1.3018,
      "step": 12724
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9750555753707886,
      "learning_rate": 4.1526480958984373e-07,
      "loss": 1.4064,
      "step": 12725
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9287127256393433,
      "learning_rate": 4.13083585119034e-07,
      "loss": 1.7036,
      "step": 12726
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5137840509414673,
      "learning_rate": 4.1090809244814785e-07,
      "loss": 1.1939,
      "step": 12727
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6365028619766235,
      "learning_rate": 4.087383317024185e-07,
      "loss": 1.403,
      "step": 12728
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8168449401855469,
      "learning_rate": 4.0657430300671264e-07,
      "loss": 1.2759,
      "step": 12729
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5083986520767212,
      "learning_rate": 4.044160064855751e-07,
      "loss": 1.5133,
      "step": 12730
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.208432912826538,
      "learning_rate": 4.0226344226325095e-07,
      "loss": 1.4735,
      "step": 12731
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7257978916168213,
      "learning_rate": 4.001166104636078e-07,
      "loss": 1.4038,
      "step": 12732
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2246801853179932,
      "learning_rate": 3.979755112102357e-07,
      "loss": 1.5404,
      "step": 12733
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.775120735168457,
      "learning_rate": 3.958401446263471e-07,
      "loss": 1.0596,
      "step": 12734
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4223312139511108,
      "learning_rate": 3.9371051083484377e-07,
      "loss": 0.7452,
      "step": 12735
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0000736713409424,
      "learning_rate": 3.9158660995830545e-07,
      "loss": 1.3832,
      "step": 12736
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6141575574874878,
      "learning_rate": 3.894684421189787e-07,
      "loss": 1.0792,
      "step": 12737
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5151458978652954,
      "learning_rate": 3.873560074387661e-07,
      "loss": 1.5626,
      "step": 12738
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2748602628707886,
      "learning_rate": 3.8524930603925925e-07,
      "loss": 1.3519,
      "step": 12739
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2633888721466064,
      "learning_rate": 3.8314833804170557e-07,
      "loss": 1.5695,
      "step": 12740
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0050466060638428,
      "learning_rate": 3.810531035670306e-07,
      "loss": 1.2507,
      "step": 12741
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3192384243011475,
      "learning_rate": 3.789636027358268e-07,
      "loss": 1.6574,
      "step": 12742
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7033772468566895,
      "learning_rate": 3.7687983566835337e-07,
      "loss": 1.4655,
      "step": 12743
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.191511631011963,
      "learning_rate": 3.7480180248454786e-07,
      "loss": 1.0101,
      "step": 12744
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3629897832870483,
      "learning_rate": 3.727295033040035e-07,
      "loss": 1.3986,
      "step": 12745
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3519408702850342,
      "learning_rate": 3.706629382460136e-07,
      "loss": 1.2866,
      "step": 12746
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5766570568084717,
      "learning_rate": 3.686021074295054e-07,
      "loss": 1.1057,
      "step": 12747
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4033536911010742,
      "learning_rate": 3.6654701097308396e-07,
      "loss": 1.0007,
      "step": 12748
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1542420387268066,
      "learning_rate": 3.644976489950436e-07,
      "loss": 1.2275,
      "step": 12749
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.416428565979004,
      "learning_rate": 3.624540216133454e-07,
      "loss": 1.1159,
      "step": 12750
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8117396831512451,
      "learning_rate": 3.604161289455954e-07,
      "loss": 1.3749,
      "step": 12751
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1657897233963013,
      "learning_rate": 3.583839711090997e-07,
      "loss": 1.3527,
      "step": 12752
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0611768960952759,
      "learning_rate": 3.5635754822079814e-07,
      "loss": 1.4642,
      "step": 12753
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0172266960144043,
      "learning_rate": 3.543368603973529e-07,
      "loss": 1.1949,
      "step": 12754
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9665239453315735,
      "learning_rate": 3.523219077550488e-07,
      "loss": 0.8792,
      "step": 12755
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.454942226409912,
      "learning_rate": 3.503126904098597e-07,
      "loss": 0.8829,
      "step": 12756
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.3729732036590576,
      "learning_rate": 3.4830920847742643e-07,
      "loss": 1.653,
      "step": 12757
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4022027254104614,
      "learning_rate": 3.4631146207305674e-07,
      "loss": 1.7101,
      "step": 12758
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6976242065429688,
      "learning_rate": 3.443194513117476e-07,
      "loss": 1.3799,
      "step": 12759
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.137021780014038,
      "learning_rate": 3.4233317630814053e-07,
      "loss": 1.0288,
      "step": 12760
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.0514113903045654,
      "learning_rate": 3.403526371765553e-07,
      "loss": 1.2549,
      "step": 12761
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.0150631666183472,
      "learning_rate": 3.3837783403098957e-07,
      "loss": 1.0952,
      "step": 12762
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6133511066436768,
      "learning_rate": 3.3640876698510795e-07,
      "loss": 1.4062,
      "step": 12763
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3847298622131348,
      "learning_rate": 3.344454361522198e-07,
      "loss": 1.308,
      "step": 12764
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5505876541137695,
      "learning_rate": 3.32487841645357e-07,
      "loss": 0.9548,
      "step": 12765
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.8806923627853394,
      "learning_rate": 3.305359835771626e-07,
      "loss": 0.544,
      "step": 12766
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4568448066711426,
      "learning_rate": 3.285898620599914e-07,
      "loss": 2.085,
      "step": 12767
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2155917882919312,
      "learning_rate": 3.266494772058537e-07,
      "loss": 1.1892,
      "step": 12768
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.9513688683509827,
      "learning_rate": 3.247148291264379e-07,
      "loss": 0.5728,
      "step": 12769
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2188634872436523,
      "learning_rate": 3.2278591793306614e-07,
      "loss": 2.0563,
      "step": 12770
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0211575031280518,
      "learning_rate": 3.208627437367939e-07,
      "loss": 1.5891,
      "step": 12771
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.135416030883789,
      "learning_rate": 3.1894530664828837e-07,
      "loss": 0.9575,
      "step": 12772
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.289207935333252,
      "learning_rate": 3.170336067779056e-07,
      "loss": 1.307,
      "step": 12773
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6456235647201538,
      "learning_rate": 3.1512764423569096e-07,
      "loss": 1.2663,
      "step": 12774
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9606289863586426,
      "learning_rate": 3.132274191313345e-07,
      "loss": 0.9646,
      "step": 12775
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2334758043289185,
      "learning_rate": 3.113329315742153e-07,
      "loss": 1.1479,
      "step": 12776
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8375918865203857,
      "learning_rate": 3.094441816733462e-07,
      "loss": 1.2692,
      "step": 12777
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.877069354057312,
      "learning_rate": 3.075611695374625e-07,
      "loss": 1.585,
      "step": 12778
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.6548986434936523,
      "learning_rate": 3.0568389527493304e-07,
      "loss": 1.9541,
      "step": 12779
    },
    {
      "epoch": 0.98,
      "grad_norm": 4.1052117347717285,
      "learning_rate": 3.038123589938047e-07,
      "loss": 2.3469,
      "step": 12780
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5611824989318848,
      "learning_rate": 3.019465608018024e-07,
      "loss": 1.3196,
      "step": 12781
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5108388662338257,
      "learning_rate": 3.0008650080629584e-07,
      "loss": 1.6856,
      "step": 12782
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4498653411865234,
      "learning_rate": 2.982321791143661e-07,
      "loss": 0.8269,
      "step": 12783
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.323085069656372,
      "learning_rate": 2.963835958327166e-07,
      "loss": 1.2817,
      "step": 12784
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8389923572540283,
      "learning_rate": 2.945407510677622e-07,
      "loss": 1.2373,
      "step": 12785
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0969421863555908,
      "learning_rate": 2.927036449255516e-07,
      "loss": 0.9538,
      "step": 12786
    },
    {
      "epoch": 0.98,
      "grad_norm": 4.278913497924805,
      "learning_rate": 2.9087227751183333e-07,
      "loss": 1.8364,
      "step": 12787
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2199299335479736,
      "learning_rate": 2.8904664893202315e-07,
      "loss": 0.8578,
      "step": 12788
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4163312911987305,
      "learning_rate": 2.872267592911704e-07,
      "loss": 1.0552,
      "step": 12789
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1106151342391968,
      "learning_rate": 2.854126086940356e-07,
      "loss": 1.2145,
      "step": 12790
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3352519273757935,
      "learning_rate": 2.836041972450465e-07,
      "loss": 1.266,
      "step": 12791
    },
    {
      "epoch": 0.98,
      "grad_norm": 6.424253463745117,
      "learning_rate": 2.8180152504825305e-07,
      "loss": 1.4428,
      "step": 12792
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1964629888534546,
      "learning_rate": 2.80004592207439e-07,
      "loss": 0.5049,
      "step": 12793
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1678825616836548,
      "learning_rate": 2.782133988260216e-07,
      "loss": 0.934,
      "step": 12794
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9109352827072144,
      "learning_rate": 2.76427945007085e-07,
      "loss": 1.5792,
      "step": 12795
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0881690979003906,
      "learning_rate": 2.746482308534026e-07,
      "loss": 1.0399,
      "step": 12796
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.826489210128784,
      "learning_rate": 2.728742564674036e-07,
      "loss": 2.0454,
      "step": 12797
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9249575734138489,
      "learning_rate": 2.711060219511841e-07,
      "loss": 1.0052,
      "step": 12798
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.218579888343811,
      "learning_rate": 2.6934352740652927e-07,
      "loss": 0.938,
      "step": 12799
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4995146989822388,
      "learning_rate": 2.675867729348691e-07,
      "loss": 1.5547,
      "step": 12800
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1039901971817017,
      "learning_rate": 2.658357586373228e-07,
      "loss": 0.9717,
      "step": 12801
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.633798599243164,
      "learning_rate": 2.640904846146652e-07,
      "loss": 2.0811,
      "step": 12802
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.171786069869995,
      "learning_rate": 2.623509509673494e-07,
      "loss": 1.5582,
      "step": 12803
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3295164108276367,
      "learning_rate": 2.6061715779549525e-07,
      "loss": 2.2361,
      "step": 12804
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7054067850112915,
      "learning_rate": 2.588891051988895e-07,
      "loss": 1.7949,
      "step": 12805
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1723538637161255,
      "learning_rate": 2.571667932769972e-07,
      "loss": 1.6443,
      "step": 12806
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.220219135284424,
      "learning_rate": 2.554502221289501e-07,
      "loss": 1.6393,
      "step": 12807
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.892407774925232,
      "learning_rate": 2.537393918535358e-07,
      "loss": 1.0378,
      "step": 12808
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.215900182723999,
      "learning_rate": 2.520343025492422e-07,
      "loss": 1.1885,
      "step": 12809
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.321803331375122,
      "learning_rate": 2.5033495431417973e-07,
      "loss": 1.0415,
      "step": 12810
    },
    {
      "epoch": 0.98,
      "grad_norm": 5.129791259765625,
      "learning_rate": 2.4864134724617017e-07,
      "loss": 2.5274,
      "step": 12811
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8195880651474,
      "learning_rate": 2.46953481442691e-07,
      "loss": 1.4039,
      "step": 12812
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.8804574012756348,
      "learning_rate": 2.4527135700088687e-07,
      "loss": 1.9159,
      "step": 12813
    },
    {
      "epoch": 0.98,
      "grad_norm": 8.903481483459473,
      "learning_rate": 2.4359497401758024e-07,
      "loss": 2.4119,
      "step": 12814
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7855799198150635,
      "learning_rate": 2.419243325892384e-07,
      "loss": 1.6728,
      "step": 12815
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3078745603561401,
      "learning_rate": 2.402594328120289e-07,
      "loss": 0.7958,
      "step": 12816
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.8794970512390137,
      "learning_rate": 2.386002747817861e-07,
      "loss": 2.1031,
      "step": 12817
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9010103940963745,
      "learning_rate": 2.3694685859398934e-07,
      "loss": 0.7978,
      "step": 12818
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.65750253200531,
      "learning_rate": 2.352991843438068e-07,
      "loss": 1.4741,
      "step": 12819
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.164198875427246,
      "learning_rate": 2.3365725212607381e-07,
      "loss": 1.2982,
      "step": 12820
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9118053913116455,
      "learning_rate": 2.320210620352814e-07,
      "loss": 1.3188,
      "step": 12821
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.454399824142456,
      "learning_rate": 2.303906141656098e-07,
      "loss": 1.5434,
      "step": 12822
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2980815172195435,
      "learning_rate": 2.2876590861091728e-07,
      "loss": 1.8224,
      "step": 12823
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9101951122283936,
      "learning_rate": 2.2714694546468463e-07,
      "loss": 0.9984,
      "step": 12824
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.598374843597412,
      "learning_rate": 2.2553372482010393e-07,
      "loss": 1.4186,
      "step": 12825
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4274874925613403,
      "learning_rate": 2.2392624677004536e-07,
      "loss": 1.5667,
      "step": 12826
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4455347061157227,
      "learning_rate": 2.223245114070016e-07,
      "loss": 1.3435,
      "step": 12827
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.995953917503357,
      "learning_rate": 2.2072851882317668e-07,
      "loss": 1.3622,
      "step": 12828
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4417988061904907,
      "learning_rate": 2.1913826911041936e-07,
      "loss": 1.656,
      "step": 12829
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2023992538452148,
      "learning_rate": 2.1755376236025637e-07,
      "loss": 1.2579,
      "step": 12830
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.857204556465149,
      "learning_rate": 2.159749986639037e-07,
      "loss": 1.4034,
      "step": 12831
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4077574014663696,
      "learning_rate": 2.1440197811221086e-07,
      "loss": 1.2855,
      "step": 12832
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0538036823272705,
      "learning_rate": 2.1283470079571655e-07,
      "loss": 1.0268,
      "step": 12833
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3885960578918457,
      "learning_rate": 2.112731668046375e-07,
      "loss": 1.434,
      "step": 12834
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5959556102752686,
      "learning_rate": 2.0971737622883515e-07,
      "loss": 1.8621,
      "step": 12835
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3724353313446045,
      "learning_rate": 2.081673291578601e-07,
      "loss": 1.4681,
      "step": 12836
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2021219730377197,
      "learning_rate": 2.0662302568092983e-07,
      "loss": 1.3177,
      "step": 12837
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.034318447113037,
      "learning_rate": 2.0508446588691777e-07,
      "loss": 1.7483,
      "step": 12838
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.248673439025879,
      "learning_rate": 2.0355164986438634e-07,
      "loss": 1.2746,
      "step": 12839
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.32051420211792,
      "learning_rate": 2.020245777015539e-07,
      "loss": 1.1599,
      "step": 12840
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1248825788497925,
      "learning_rate": 2.0050324948631683e-07,
      "loss": 1.3832,
      "step": 12841
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2302112579345703,
      "learning_rate": 1.9898766530623837e-07,
      "loss": 0.9532,
      "step": 12842
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4231088161468506,
      "learning_rate": 1.9747782524853764e-07,
      "loss": 1.2557,
      "step": 12843
    },
    {
      "epoch": 0.98,
      "grad_norm": 6.090037822723389,
      "learning_rate": 1.9597372940012293e-07,
      "loss": 1.8118,
      "step": 12844
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4175390005111694,
      "learning_rate": 1.9447537784754722e-07,
      "loss": 1.7262,
      "step": 12845
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.174777865409851,
      "learning_rate": 1.929827706770859e-07,
      "loss": 1.4549,
      "step": 12846
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.251690149307251,
      "learning_rate": 1.914959079746037e-07,
      "loss": 1.1782,
      "step": 12847
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0829081535339355,
      "learning_rate": 1.9001478982570987e-07,
      "loss": 0.9158,
      "step": 12848
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5285626649856567,
      "learning_rate": 1.8853941631564732e-07,
      "loss": 1.4677,
      "step": 12849
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9779148101806641,
      "learning_rate": 1.8706978752931482e-07,
      "loss": 1.1994,
      "step": 12850
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.8384947776794434,
      "learning_rate": 1.8560590355131135e-07,
      "loss": 0.582,
      "step": 12851
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2426730394363403,
      "learning_rate": 1.8414776446588067e-07,
      "loss": 1.1813,
      "step": 12852
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3564445972442627,
      "learning_rate": 1.826953703569667e-07,
      "loss": 1.6348,
      "step": 12853
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.625251293182373,
      "learning_rate": 1.8124872130814707e-07,
      "loss": 1.5961,
      "step": 12854
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4111605882644653,
      "learning_rate": 1.7980781740268848e-07,
      "loss": 1.7244,
      "step": 12855
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7974493503570557,
      "learning_rate": 1.783726587235357e-07,
      "loss": 1.3067,
      "step": 12856
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4127471446990967,
      "learning_rate": 1.7694324535326712e-07,
      "loss": 1.4994,
      "step": 12857
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7099695205688477,
      "learning_rate": 1.7551957737417246e-07,
      "loss": 1.1505,
      "step": 12858
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6027848720550537,
      "learning_rate": 1.741016548681751e-07,
      "loss": 1.3358,
      "step": 12859
    },
    {
      "epoch": 0.98,
      "grad_norm": 4.5892791748046875,
      "learning_rate": 1.726894779168986e-07,
      "loss": 1.1818,
      "step": 12860
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8983854055404663,
      "learning_rate": 1.7128304660162243e-07,
      "loss": 1.0211,
      "step": 12861
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5142383575439453,
      "learning_rate": 1.6988236100329292e-07,
      "loss": 1.1748,
      "step": 12862
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2735488414764404,
      "learning_rate": 1.6848742120251227e-07,
      "loss": 1.3875,
      "step": 12863
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5649776458740234,
      "learning_rate": 1.670982272795829e-07,
      "loss": 2.2509,
      "step": 12864
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5526072978973389,
      "learning_rate": 1.6571477931446312e-07,
      "loss": 0.8066,
      "step": 12865
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.6135847568511963,
      "learning_rate": 1.6433707738677807e-07,
      "loss": 1.1142,
      "step": 12866
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.71660315990448,
      "learning_rate": 1.6296512157580878e-07,
      "loss": 1.4421,
      "step": 12867
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6313588619232178,
      "learning_rate": 1.6159891196053656e-07,
      "loss": 1.339,
      "step": 12868
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3108444213867188,
      "learning_rate": 1.6023844861957627e-07,
      "loss": 1.2215,
      "step": 12869
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4949973821640015,
      "learning_rate": 1.5888373163124303e-07,
      "loss": 1.3397,
      "step": 12870
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.218509316444397,
      "learning_rate": 1.5753476107350784e-07,
      "loss": 0.704,
      "step": 12871
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9716659784317017,
      "learning_rate": 1.5619153702400858e-07,
      "loss": 1.9818,
      "step": 12872
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1045315265655518,
      "learning_rate": 1.5485405956005005e-07,
      "loss": 1.3228,
      "step": 12873
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2799649238586426,
      "learning_rate": 1.5352232875862625e-07,
      "loss": 1.217,
      "step": 12874
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0134366750717163,
      "learning_rate": 1.5219634469637588e-07,
      "loss": 1.0097,
      "step": 12875
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.9358737468719482,
      "learning_rate": 1.5087610744961567e-07,
      "loss": 1.3889,
      "step": 12876
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9782406091690063,
      "learning_rate": 1.4956161709434035e-07,
      "loss": 1.2362,
      "step": 12877
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2457475662231445,
      "learning_rate": 1.4825287370620055e-07,
      "loss": 1.0229,
      "step": 12878
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4456983804702759,
      "learning_rate": 1.469498773605249e-07,
      "loss": 1.2969,
      "step": 12879
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5174121856689453,
      "learning_rate": 1.4565262813230894e-07,
      "loss": 2.1708,
      "step": 12880
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.039598226547241,
      "learning_rate": 1.443611260962041e-07,
      "loss": 1.9405,
      "step": 12881
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.600274920463562,
      "learning_rate": 1.43075371326562e-07,
      "loss": 1.154,
      "step": 12882
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.101051092147827,
      "learning_rate": 1.4179536389737902e-07,
      "loss": 1.8902,
      "step": 12883
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6304064989089966,
      "learning_rate": 1.4052110388232952e-07,
      "loss": 1.3307,
      "step": 12884
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.730933666229248,
      "learning_rate": 1.3925259135474379e-07,
      "loss": 1.2153,
      "step": 12885
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3091894388198853,
      "learning_rate": 1.3798982638764112e-07,
      "loss": 1.2427,
      "step": 12886
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.7304346561431885,
      "learning_rate": 1.3673280905369679e-07,
      "loss": 1.2561,
      "step": 12887
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6485539674758911,
      "learning_rate": 1.3548153942526397e-07,
      "loss": 1.7095,
      "step": 12888
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.685180425643921,
      "learning_rate": 1.3423601757436287e-07,
      "loss": 1.6395,
      "step": 12889
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5157986879348755,
      "learning_rate": 1.329962435726695e-07,
      "loss": 0.5795,
      "step": 12890
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5039900541305542,
      "learning_rate": 1.3176221749153783e-07,
      "loss": 2.0244,
      "step": 12891
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.1374149322509766,
      "learning_rate": 1.305339394020111e-07,
      "loss": 1.7426,
      "step": 12892
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2541252374649048,
      "learning_rate": 1.2931140937477715e-07,
      "loss": 1.2873,
      "step": 12893
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6185802221298218,
      "learning_rate": 1.2809462748019085e-07,
      "loss": 1.3556,
      "step": 12894
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.264695167541504,
      "learning_rate": 1.2688359378829618e-07,
      "loss": 1.3412,
      "step": 12895
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7945841550827026,
      "learning_rate": 1.2567830836878181e-07,
      "loss": 1.4526,
      "step": 12896
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.0691999197006226,
      "learning_rate": 1.244787712910256e-07,
      "loss": 1.2199,
      "step": 12897
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1398634910583496,
      "learning_rate": 1.232849826240723e-07,
      "loss": 0.8025,
      "step": 12898
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1978858709335327,
      "learning_rate": 1.220969424366225e-07,
      "loss": 0.7603,
      "step": 12899
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.18296480178833,
      "learning_rate": 1.20914650797066e-07,
      "loss": 1.7278,
      "step": 12900
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.1868258714675903,
      "learning_rate": 1.197381077734372e-07,
      "loss": 1.239,
      "step": 12901
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3048807382583618,
      "learning_rate": 1.1856731343347083e-07,
      "loss": 1.3579,
      "step": 12902
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.718451976776123,
      "learning_rate": 1.1740226784453523e-07,
      "loss": 1.0771,
      "step": 12903
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8929359912872314,
      "learning_rate": 1.1624297107369897e-07,
      "loss": 1.0118,
      "step": 12904
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3779752254486084,
      "learning_rate": 1.1508942318767535e-07,
      "loss": 1.9847,
      "step": 12905
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2305357456207275,
      "learning_rate": 1.1394162425285571e-07,
      "loss": 1.1562,
      "step": 12906
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.80892813205719,
      "learning_rate": 1.1279957433532051e-07,
      "loss": 1.1922,
      "step": 12907
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.444332480430603,
      "learning_rate": 1.1166327350078387e-07,
      "loss": 1.2109,
      "step": 12908
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.9626941680908203,
      "learning_rate": 1.1053272181464902e-07,
      "loss": 1.427,
      "step": 12909
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.263420581817627,
      "learning_rate": 1.0940791934198613e-07,
      "loss": 1.5144,
      "step": 12910
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3178726434707642,
      "learning_rate": 1.0828886614754341e-07,
      "loss": 1.6557,
      "step": 12911
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8990662693977356,
      "learning_rate": 1.071755622957249e-07,
      "loss": 1.3612,
      "step": 12912
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6793018579483032,
      "learning_rate": 1.0606800785060156e-07,
      "loss": 1.6966,
      "step": 12913
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8150442838668823,
      "learning_rate": 1.049662028759224e-07,
      "loss": 1.554,
      "step": 12914
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0147294998168945,
      "learning_rate": 1.0387014743510338e-07,
      "loss": 1.3823,
      "step": 12915
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.9057860374450684,
      "learning_rate": 1.0277984159122733e-07,
      "loss": 1.0167,
      "step": 12916
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8563729524612427,
      "learning_rate": 1.0169528540704409e-07,
      "loss": 1.8807,
      "step": 12917
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6245012283325195,
      "learning_rate": 1.006164789449926e-07,
      "loss": 1.0912,
      "step": 12918
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1901897192001343,
      "learning_rate": 9.954342226714542e-08,
      "loss": 0.8854,
      "step": 12919
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.363689661026001,
      "learning_rate": 9.847611543526425e-08,
      "loss": 0.7873,
      "step": 12920
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6762940883636475,
      "learning_rate": 9.741455851078885e-08,
      "loss": 1.5695,
      "step": 12921
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3584486246109009,
      "learning_rate": 9.635875155482588e-08,
      "loss": 1.12,
      "step": 12922
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1295270919799805,
      "learning_rate": 9.530869462811565e-08,
      "loss": 1.3177,
      "step": 12923
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4553395509719849,
      "learning_rate": 9.42643877911209e-08,
      "loss": 1.463,
      "step": 12924
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1461683511734009,
      "learning_rate": 9.32258311039269e-08,
      "loss": 1.1596,
      "step": 12925
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0622315406799316,
      "learning_rate": 9.219302462631918e-08,
      "loss": 1.2986,
      "step": 12926
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2595628499984741,
      "learning_rate": 9.116596841775016e-08,
      "loss": 1.4832,
      "step": 12927
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0142486095428467,
      "learning_rate": 9.014466253731701e-08,
      "loss": 0.9047,
      "step": 12928
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2860453128814697,
      "learning_rate": 8.912910704381717e-08,
      "loss": 1.2439,
      "step": 12929
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7344337701797485,
      "learning_rate": 8.811930199568163e-08,
      "loss": 1.7577,
      "step": 12930
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8922234773635864,
      "learning_rate": 8.711524745105282e-08,
      "loss": 0.8676,
      "step": 12931
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2121728658676147,
      "learning_rate": 8.611694346769562e-08,
      "loss": 1.2942,
      "step": 12932
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3494877815246582,
      "learning_rate": 8.512439010309736e-08,
      "loss": 1.4967,
      "step": 12933
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9372782707214355,
      "learning_rate": 8.413758741435685e-08,
      "loss": 1.5609,
      "step": 12934
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3069089651107788,
      "learning_rate": 8.315653545828417e-08,
      "loss": 1.0191,
      "step": 12935
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.228196144104004,
      "learning_rate": 8.218123429134528e-08,
      "loss": 1.5407,
      "step": 12936
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.329140543937683,
      "learning_rate": 8.121168396967305e-08,
      "loss": 1.4512,
      "step": 12937
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.884031057357788,
      "learning_rate": 8.024788454906729e-08,
      "loss": 1.2175,
      "step": 12938
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9363634586334229,
      "learning_rate": 7.928983608500584e-08,
      "loss": 1.294,
      "step": 12939
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.110454797744751,
      "learning_rate": 7.833753863263349e-08,
      "loss": 1.1306,
      "step": 12940
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1355233192443848,
      "learning_rate": 7.739099224673974e-08,
      "loss": 1.2171,
      "step": 12941
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7008346319198608,
      "learning_rate": 7.645019698182543e-08,
      "loss": 2.1644,
      "step": 12942
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.079796552658081,
      "learning_rate": 7.551515289203615e-08,
      "loss": 1.3863,
      "step": 12943
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.456995964050293,
      "learning_rate": 7.45858600311733e-08,
      "loss": 1.1837,
      "step": 12944
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2507562637329102,
      "learning_rate": 7.366231845273852e-08,
      "loss": 0.9695,
      "step": 12945
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2294044494628906,
      "learning_rate": 7.274452820987821e-08,
      "loss": 1.6152,
      "step": 12946
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3240814208984375,
      "learning_rate": 7.183248935542785e-08,
      "loss": 1.7787,
      "step": 12947
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.103482723236084,
      "learning_rate": 7.092620194186772e-08,
      "loss": 1.3297,
      "step": 12948
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4750185012817383,
      "learning_rate": 7.002566602136717e-08,
      "loss": 1.1164,
      "step": 12949
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6707359552383423,
      "learning_rate": 6.913088164575143e-08,
      "loss": 1.5868,
      "step": 12950
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2213276624679565,
      "learning_rate": 6.824184886653484e-08,
      "loss": 1.4929,
      "step": 12951
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2973508834838867,
      "learning_rate": 6.735856773486537e-08,
      "loss": 1.6799,
      "step": 12952
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4436392784118652,
      "learning_rate": 6.648103830160235e-08,
      "loss": 1.3413,
      "step": 12953
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3624699115753174,
      "learning_rate": 6.560926061722761e-08,
      "loss": 1.4831,
      "step": 12954
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6634247303009033,
      "learning_rate": 6.474323473194543e-08,
      "loss": 1.6795,
      "step": 12955
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3298704624176025,
      "learning_rate": 6.388296069557154e-08,
      "loss": 2.0174,
      "step": 12956
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0088465213775635,
      "learning_rate": 6.302843855764406e-08,
      "loss": 1.3351,
      "step": 12957
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5182079076766968,
      "learning_rate": 6.21796683673348e-08,
      "loss": 0.8328,
      "step": 12958
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.437302350997925,
      "learning_rate": 6.133665017349355e-08,
      "loss": 1.5542,
      "step": 12959
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0853683948516846,
      "learning_rate": 6.049938402464816e-08,
      "loss": 0.7337,
      "step": 12960
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.816678524017334,
      "learning_rate": 5.966786996898232e-08,
      "loss": 1.5107,
      "step": 12961
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5614066123962402,
      "learning_rate": 5.884210805435775e-08,
      "loss": 1.708,
      "step": 12962
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.914127230644226,
      "learning_rate": 5.802209832829197e-08,
      "loss": 1.2416,
      "step": 12963
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6152700185775757,
      "learning_rate": 5.720784083799169e-08,
      "loss": 1.1306,
      "step": 12964
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1223409175872803,
      "learning_rate": 5.6399335630330505e-08,
      "loss": 1.7141,
      "step": 12965
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6458039283752441,
      "learning_rate": 5.5596582751815675e-08,
      "loss": 1.232,
      "step": 12966
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6326606273651123,
      "learning_rate": 5.4799582248676876e-08,
      "loss": 1.5165,
      "step": 12967
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.082911252975464,
      "learning_rate": 5.4008334166777416e-08,
      "loss": 1.2901,
      "step": 12968
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4137210845947266,
      "learning_rate": 5.322283855164756e-08,
      "loss": 0.8612,
      "step": 12969
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8207674026489258,
      "learning_rate": 5.2443095448506674e-08,
      "loss": 1.0344,
      "step": 12970
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.0848249197006226,
      "learning_rate": 5.1669104902241083e-08,
      "loss": 1.0435,
      "step": 12971
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2594122886657715,
      "learning_rate": 5.090086695738183e-08,
      "loss": 0.8932,
      "step": 12972
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2585128545761108,
      "learning_rate": 5.0138381658160205e-08,
      "loss": 1.1598,
      "step": 12973
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4785106182098389,
      "learning_rate": 4.938164904845222e-08,
      "loss": 1.2441,
      "step": 12974
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3545690774917603,
      "learning_rate": 4.8630669171811916e-08,
      "loss": 1.5658,
      "step": 12975
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.394580364227295,
      "learning_rate": 4.788544207146028e-08,
      "loss": 1.6438,
      "step": 12976
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9596846103668213,
      "learning_rate": 4.7145967790307445e-08,
      "loss": 1.2552,
      "step": 12977
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9612076878547668,
      "learning_rate": 4.641224637089714e-08,
      "loss": 0.9828,
      "step": 12978
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3590431213378906,
      "learning_rate": 4.5684277855462255e-08,
      "loss": 1.3412,
      "step": 12979
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3380728960037231,
      "learning_rate": 4.4962062285902607e-08,
      "loss": 1.3784,
      "step": 12980
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3677524328231812,
      "learning_rate": 4.4245599703784945e-08,
      "loss": 1.5574,
      "step": 12981
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.860938549041748,
      "learning_rate": 4.353489015034295e-08,
      "loss": 1.3899,
      "step": 12982
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6002098321914673,
      "learning_rate": 4.282993366649946e-08,
      "loss": 1.636,
      "step": 12983
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8096928596496582,
      "learning_rate": 4.213073029279979e-08,
      "loss": 0.9989,
      "step": 12984
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.699006199836731,
      "learning_rate": 4.143728006951175e-08,
      "loss": 1.129,
      "step": 12985
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.17714524269104,
      "learning_rate": 4.074958303653675e-08,
      "loss": 1.0567,
      "step": 12986
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3944591283798218,
      "learning_rate": 4.006763923346535e-08,
      "loss": 1.277,
      "step": 12987
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7539170980453491,
      "learning_rate": 3.9391448699532816e-08,
      "loss": 2.2947,
      "step": 12988
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5038185119628906,
      "learning_rate": 3.8721011473663584e-08,
      "loss": 0.9587,
      "step": 12989
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2787115573883057,
      "learning_rate": 3.8056327594448995e-08,
      "loss": 1.5902,
      "step": 12990
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7378813028335571,
      "learning_rate": 3.739739710013623e-08,
      "loss": 1.4055,
      "step": 12991
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1306419372558594,
      "learning_rate": 3.674422002866162e-08,
      "loss": 1.1734,
      "step": 12992
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.8701344728469849,
      "learning_rate": 3.609679641760622e-08,
      "loss": 0.9176,
      "step": 12993
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.174085021018982,
      "learning_rate": 3.54551263042513e-08,
      "loss": 1.2085,
      "step": 12994
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4735239744186401,
      "learning_rate": 3.4819209725511783e-08,
      "loss": 0.8705,
      "step": 12995
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5646594762802124,
      "learning_rate": 3.418904671800283e-08,
      "loss": 1.6798,
      "step": 12996
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9247677326202393,
      "learning_rate": 3.356463731798432e-08,
      "loss": 1.3998,
      "step": 12997
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2635899782180786,
      "learning_rate": 3.294598156139417e-08,
      "loss": 1.5969,
      "step": 12998
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4475375413894653,
      "learning_rate": 3.2333079483837235e-08,
      "loss": 1.0538,
      "step": 12999
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.633872628211975,
      "learning_rate": 3.1725931120596405e-08,
      "loss": 1.7678,
      "step": 13000
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7793275117874146,
      "learning_rate": 3.1124536506621503e-08,
      "loss": 1.72,
      "step": 13001
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.7511110305786133,
      "learning_rate": 3.052889567651818e-08,
      "loss": 1.4299,
      "step": 13002
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4101026058197021,
      "learning_rate": 2.993900866455901e-08,
      "loss": 1.8958,
      "step": 13003
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.673147201538086,
      "learning_rate": 2.9354875504716828e-08,
      "loss": 1.855,
      "step": 13004
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1517202854156494,
      "learning_rate": 2.877649623059808e-08,
      "loss": 1.7474,
      "step": 13005
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2464805841445923,
      "learning_rate": 2.820387087548726e-08,
      "loss": 0.492,
      "step": 13006
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2130409479141235,
      "learning_rate": 2.7636999472357982e-08,
      "loss": 0.9248,
      "step": 13007
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1287996768951416,
      "learning_rate": 2.7075882053828605e-08,
      "loss": 0.9207,
      "step": 13008
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0530011653900146,
      "learning_rate": 2.6520518652195515e-08,
      "loss": 1.3558,
      "step": 13009
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8479949235916138,
      "learning_rate": 2.597090929942203e-08,
      "loss": 1.3716,
      "step": 13010
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.611850619316101,
      "learning_rate": 2.542705402714951e-08,
      "loss": 1.7101,
      "step": 13011
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.795548915863037,
      "learning_rate": 2.488895286666404e-08,
      "loss": 1.7576,
      "step": 13012
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4366235733032227,
      "learning_rate": 2.435660584894084e-08,
      "loss": 1.2274,
      "step": 13013
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5327491760253906,
      "learning_rate": 2.3830013004633167e-08,
      "loss": 1.6943,
      "step": 13014
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.474300742149353,
      "learning_rate": 2.3309174364027907e-08,
      "loss": 1.6037,
      "step": 13015
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2946513891220093,
      "learning_rate": 2.279408995712329e-08,
      "loss": 1.5064,
      "step": 13016
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6225435733795166,
      "learning_rate": 2.228475981355116e-08,
      "loss": 1.6995,
      "step": 13017
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7840567827224731,
      "learning_rate": 2.1781183962632514e-08,
      "loss": 1.4067,
      "step": 13018
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.410539150238037,
      "learning_rate": 2.128336243335527e-08,
      "loss": 1.4323,
      "step": 13019
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.106014609336853,
      "learning_rate": 2.0791295254363186e-08,
      "loss": 1.5362,
      "step": 13020
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.83736252784729,
      "learning_rate": 2.030498245397805e-08,
      "loss": 0.9992,
      "step": 13021
    },
    {
      "epoch": 0.99,
      "grad_norm": 5.116086483001709,
      "learning_rate": 1.9824424060199686e-08,
      "loss": 1.7941,
      "step": 13022
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.3770225048065186,
      "learning_rate": 1.934962010067265e-08,
      "loss": 1.8613,
      "step": 13023
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4901096820831299,
      "learning_rate": 1.888057060274173e-08,
      "loss": 1.1431,
      "step": 13024
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3864434957504272,
      "learning_rate": 1.841727559338535e-08,
      "loss": 1.3395,
      "step": 13025
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9583486318588257,
      "learning_rate": 1.7959735099282173e-08,
      "loss": 1.0968,
      "step": 13026
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1463561058044434,
      "learning_rate": 1.7507949146755576e-08,
      "loss": 1.5382,
      "step": 13027
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.1830297708511353,
      "learning_rate": 1.7061917761818092e-08,
      "loss": 1.261,
      "step": 13028
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5746816396713257,
      "learning_rate": 1.6621640970138075e-08,
      "loss": 1.778,
      "step": 13029
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.932317316532135,
      "learning_rate": 1.6187118797061917e-08,
      "loss": 1.2152,
      "step": 13030
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3800225257873535,
      "learning_rate": 1.575835126758074e-08,
      "loss": 1.2349,
      "step": 13031
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4069998264312744,
      "learning_rate": 1.5335338406397005e-08,
      "loss": 0.6984,
      "step": 13032
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9079850912094116,
      "learning_rate": 1.4918080237835695e-08,
      "loss": 1.8946,
      "step": 13033
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.9941762089729309,
      "learning_rate": 1.4506576785922044e-08,
      "loss": 1.0856,
      "step": 13034
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7135776281356812,
      "learning_rate": 1.4100828074348204e-08,
      "loss": 1.1819,
      "step": 13035
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3731226921081543,
      "learning_rate": 1.3700834126451067e-08,
      "loss": 1.6735,
      "step": 13036
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4358147382736206,
      "learning_rate": 1.3306594965256659e-08,
      "loss": 1.5717,
      "step": 13037
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2522157430648804,
      "learning_rate": 1.2918110613469037e-08,
      "loss": 1.4952,
      "step": 13038
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4626022577285767,
      "learning_rate": 1.2535381093436994e-08,
      "loss": 1.2227,
      "step": 13039
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2103689908981323,
      "learning_rate": 1.2158406427187351e-08,
      "loss": 1.573,
      "step": 13040
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7043310403823853,
      "learning_rate": 1.1787186636424974e-08,
      "loss": 0.9625,
      "step": 13041
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.302645206451416,
      "learning_rate": 1.1421721742499447e-08,
      "loss": 1.275,
      "step": 13042
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5690776109695435,
      "learning_rate": 1.1062011766471703e-08,
      "loss": 1.5241,
      "step": 13043
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.35688316822052,
      "learning_rate": 1.0708056729014093e-08,
      "loss": 1.298,
      "step": 13044
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6655292510986328,
      "learning_rate": 1.0359856650532519e-08,
      "loss": 1.1936,
      "step": 13045
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4400992393493652,
      "learning_rate": 1.0017411551033196e-08,
      "loss": 0.8813,
      "step": 13046
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.54033362865448,
      "learning_rate": 9.680721450255892e-09,
      "loss": 1.4508,
      "step": 13047
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3751689195632935,
      "learning_rate": 9.349786367562896e-09,
      "loss": 1.2943,
      "step": 13048
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.749281406402588,
      "learning_rate": 9.024606321994534e-09,
      "loss": 1.8106,
      "step": 13049
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8606630563735962,
      "learning_rate": 8.705181332291368e-09,
      "loss": 0.9379,
      "step": 13050
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.365432620048523,
      "learning_rate": 8.391511416816489e-09,
      "loss": 1.2651,
      "step": 13051
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4118916988372803,
      "learning_rate": 8.083596593633224e-09,
      "loss": 1.528,
      "step": 13052
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.440287709236145,
      "learning_rate": 7.781436880449633e-09,
      "loss": 1.251,
      "step": 13053
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7703996896743774,
      "learning_rate": 7.485032294685112e-09,
      "loss": 1.3845,
      "step": 13054
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.308136224746704,
      "learning_rate": 7.194382853370485e-09,
      "loss": 1.1472,
      "step": 13055
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9128265380859375,
      "learning_rate": 6.909488573247913e-09,
      "loss": 1.1735,
      "step": 13056
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5222864151000977,
      "learning_rate": 6.630349470715391e-09,
      "loss": 1.6657,
      "step": 13057
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2374308109283447,
      "learning_rate": 6.356965561837846e-09,
      "loss": 1.7693,
      "step": 13058
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.3625922203063965,
      "learning_rate": 6.0893368623471395e-09,
      "loss": 1.8851,
      "step": 13059
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7656152248382568,
      "learning_rate": 5.827463387653165e-09,
      "loss": 1.7633,
      "step": 13060
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2579177618026733,
      "learning_rate": 5.5713451528216495e-09,
      "loss": 0.9799,
      "step": 13061
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9340088367462158,
      "learning_rate": 5.320982172596356e-09,
      "loss": 1.6121,
      "step": 13062
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2279062271118164,
      "learning_rate": 5.076374461387978e-09,
      "loss": 1.2816,
      "step": 13063
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0960898399353027,
      "learning_rate": 4.8375220332630424e-09,
      "loss": 1.3725,
      "step": 13064
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3350573778152466,
      "learning_rate": 4.604424901988313e-09,
      "loss": 1.6687,
      "step": 13065
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.542635202407837,
      "learning_rate": 4.377083080975286e-09,
      "loss": 1.5129,
      "step": 13066
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2901654243469238,
      "learning_rate": 4.155496583291285e-09,
      "loss": 1.3297,
      "step": 13067
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.587969422340393,
      "learning_rate": 3.939665421714978e-09,
      "loss": 1.2915,
      "step": 13068
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1753084659576416,
      "learning_rate": 3.729589608647555e-09,
      "loss": 0.7129,
      "step": 13069
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5924774408340454,
      "learning_rate": 3.5252691562015496e-09,
      "loss": 1.5836,
      "step": 13070
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.67207932472229,
      "learning_rate": 3.3267040761120196e-09,
      "loss": 1.238,
      "step": 13071
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0933771133422852,
      "learning_rate": 3.133894379825364e-09,
      "loss": 1.2126,
      "step": 13072
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5563164949417114,
      "learning_rate": 2.946840078432711e-09,
      "loss": 1.2895,
      "step": 13073
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.681457281112671,
      "learning_rate": 2.765541182703224e-09,
      "loss": 1.258,
      "step": 13074
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8284963369369507,
      "learning_rate": 2.589997703072999e-09,
      "loss": 1.4698,
      "step": 13075
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.0427696704864502,
      "learning_rate": 2.4202096496339645e-09,
      "loss": 1.0732,
      "step": 13076
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4734878540039062,
      "learning_rate": 2.256177032178286e-09,
      "loss": 1.8466,
      "step": 13077
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7486759424209595,
      "learning_rate": 2.0978998601206556e-09,
      "loss": 1.1659,
      "step": 13078
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4944041967391968,
      "learning_rate": 1.9453781425982088e-09,
      "loss": 1.6242,
      "step": 13079
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3040210008621216,
      "learning_rate": 1.798611888370605e-09,
      "loss": 1.2445,
      "step": 13080
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8280903100967407,
      "learning_rate": 1.6576011058866415e-09,
      "loss": 1.3942,
      "step": 13081
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.88871693611145,
      "learning_rate": 1.5223458032731507e-09,
      "loss": 1.8166,
      "step": 13082
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.9798934459686279,
      "learning_rate": 1.3928459883016943e-09,
      "loss": 0.6453,
      "step": 13083
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1397813558578491,
      "learning_rate": 1.2691016684440727e-09,
      "loss": 1.7545,
      "step": 13084
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.7046303749084473,
      "learning_rate": 1.1511128508057135e-09,
      "loss": 1.5912,
      "step": 13085
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6774709224700928,
      "learning_rate": 1.0388795421811814e-09,
      "loss": 1.0701,
      "step": 13086
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5971782207489014,
      "learning_rate": 9.324017490319747e-10,
      "loss": 2.2122,
      "step": 13087
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.318084716796875,
      "learning_rate": 8.316794774865245e-10,
      "loss": 1.6291,
      "step": 13088
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1688032150268555,
      "learning_rate": 7.367127333401946e-10,
      "loss": 1.6302,
      "step": 13089
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3487178087234497,
      "learning_rate": 6.475015220552827e-10,
      "loss": 1.2021,
      "step": 13090
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.100914716720581,
      "learning_rate": 5.640458487832234e-10,
      "loss": 1.7618,
      "step": 13091
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.738292932510376,
      "learning_rate": 4.863457183090781e-10,
      "loss": 1.6146,
      "step": 13092
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5619438886642456,
      "learning_rate": 4.144011351181476e-10,
      "loss": 1.5572,
      "step": 13093
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6054821014404297,
      "learning_rate": 3.482121033293595e-10,
      "loss": 1.3593,
      "step": 13094
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8913911581039429,
      "learning_rate": 2.877786267840854e-10,
      "loss": 1.2727,
      "step": 13095
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.551679849624634,
      "learning_rate": 2.331007089351189e-10,
      "loss": 1.5972,
      "step": 13096
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.327900767326355,
      "learning_rate": 1.841783529465957e-10,
      "loss": 0.7587,
      "step": 13097
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8350539207458496,
      "learning_rate": 1.410115616162777e-10,
      "loss": 1.6332,
      "step": 13098
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.408581495285034,
      "learning_rate": 1.0360033744216679e-10,
      "loss": 1.3571,
      "step": 13099
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1784712076187134,
      "learning_rate": 7.194468257809562e-11,
      "loss": 1.2123,
      "step": 13100
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.520948886871338,
      "learning_rate": 4.604459883372769e-11,
      "loss": 1.7677,
      "step": 13101
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.671692371368408,
      "learning_rate": 2.5900087718966348e-11,
      "loss": 2.0395,
      "step": 13102
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2023794651031494,
      "learning_rate": 1.1511150366239065e-11,
      "loss": 1.7194,
      "step": 13103
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8334157466888428,
      "learning_rate": 2.877787630417572e-12,
      "loss": 1.5866,
      "step": 13104
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.2907087802886963,
      "learning_rate": 0.0,
      "loss": 1.5566,
      "step": 13105
    }
  ],
  "logging_steps": 1,
  "max_steps": 13105,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 1.6997648825843712e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}