{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.06122556358609606,
  "eval_steps": 500,
  "global_step": 2000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 3.061278179304803e-05,
      "grad_norm": 4.502593994140625,
      "learning_rate": 4.9975e-05,
      "loss": 7.4768,
      "step": 1
    },
    {
      "epoch": 6.122556358609606e-05,
      "grad_norm": 4.490589141845703,
      "learning_rate": 4.995e-05,
      "loss": 6.8265,
      "step": 2
    },
    {
      "epoch": 9.183834537914409e-05,
      "grad_norm": 4.760077953338623,
      "learning_rate": 4.992500000000001e-05,
      "loss": 7.3052,
      "step": 3
    },
    {
      "epoch": 0.0001224511271721921,
      "grad_norm": 4.743495464324951,
      "learning_rate": 4.99e-05,
      "loss": 7.2895,
      "step": 4
    },
    {
      "epoch": 0.00015306390896524015,
      "grad_norm": 4.851467609405518,
      "learning_rate": 4.9875000000000006e-05,
      "loss": 6.7358,
      "step": 5
    },
    {
      "epoch": 0.00018367669075828818,
      "grad_norm": 5.465917110443115,
      "learning_rate": 4.9850000000000006e-05,
      "loss": 6.7871,
      "step": 6
    },
    {
      "epoch": 0.0002142894725513362,
      "grad_norm": 5.8313140869140625,
      "learning_rate": 4.9825000000000005e-05,
      "loss": 6.6599,
      "step": 7
    },
    {
      "epoch": 0.0002449022543443842,
      "grad_norm": 5.76391077041626,
      "learning_rate": 4.9800000000000004e-05,
      "loss": 6.4281,
      "step": 8
    },
    {
      "epoch": 0.00027551503613743223,
      "grad_norm": 5.03156042098999,
      "learning_rate": 4.9775000000000004e-05,
      "loss": 6.1765,
      "step": 9
    },
    {
      "epoch": 0.0003061278179304803,
      "grad_norm": 5.694817543029785,
      "learning_rate": 4.975e-05,
      "loss": 6.2647,
      "step": 10
    },
    {
      "epoch": 0.0003367405997235283,
      "grad_norm": 6.083527088165283,
      "learning_rate": 4.9725e-05,
      "loss": 6.3485,
      "step": 11
    },
    {
      "epoch": 0.00036735338151657637,
      "grad_norm": 5.630711078643799,
      "learning_rate": 4.97e-05,
      "loss": 6.1123,
      "step": 12
    },
    {
      "epoch": 0.0003979661633096244,
      "grad_norm": 5.224775791168213,
      "learning_rate": 4.967500000000001e-05,
      "loss": 6.3865,
      "step": 13
    },
    {
      "epoch": 0.0004285789451026724,
      "grad_norm": 6.528948783874512,
      "learning_rate": 4.965e-05,
      "loss": 6.0597,
      "step": 14
    },
    {
      "epoch": 0.00045919172689572044,
      "grad_norm": 6.53516960144043,
      "learning_rate": 4.962500000000001e-05,
      "loss": 5.6297,
      "step": 15
    },
    {
      "epoch": 0.0004898045086887685,
      "grad_norm": 6.0080132484436035,
      "learning_rate": 4.96e-05,
      "loss": 5.6658,
      "step": 16
    },
    {
      "epoch": 0.0005204172904818165,
      "grad_norm": 6.151450157165527,
      "learning_rate": 4.9575000000000006e-05,
      "loss": 5.638,
      "step": 17
    },
    {
      "epoch": 0.0005510300722748645,
      "grad_norm": 6.599404335021973,
      "learning_rate": 4.9550000000000005e-05,
      "loss": 5.0072,
      "step": 18
    },
    {
      "epoch": 0.0005816428540679126,
      "grad_norm": 6.394167900085449,
      "learning_rate": 4.9525000000000004e-05,
      "loss": 5.159,
      "step": 19
    },
    {
      "epoch": 0.0006122556358609606,
      "grad_norm": 5.833392143249512,
      "learning_rate": 4.9500000000000004e-05,
      "loss": 5.7577,
      "step": 20
    },
    {
      "epoch": 0.0006428684176540086,
      "grad_norm": 6.336911201477051,
      "learning_rate": 4.9475e-05,
      "loss": 4.8549,
      "step": 21
    },
    {
      "epoch": 0.0006734811994470566,
      "grad_norm": 6.07256555557251,
      "learning_rate": 4.945e-05,
      "loss": 4.9603,
      "step": 22
    },
    {
      "epoch": 0.0007040939812401046,
      "grad_norm": 4.828920364379883,
      "learning_rate": 4.9425e-05,
      "loss": 4.8669,
      "step": 23
    },
    {
      "epoch": 0.0007347067630331527,
      "grad_norm": 6.025134563446045,
      "learning_rate": 4.94e-05,
      "loss": 4.5794,
      "step": 24
    },
    {
      "epoch": 0.0007653195448262007,
      "grad_norm": 5.962133407592773,
      "learning_rate": 4.937500000000001e-05,
      "loss": 4.5733,
      "step": 25
    },
    {
      "epoch": 0.0007959323266192487,
      "grad_norm": 6.416510581970215,
      "learning_rate": 4.935e-05,
      "loss": 4.0229,
      "step": 26
    },
    {
      "epoch": 0.0008265451084122968,
      "grad_norm": 4.495734214782715,
      "learning_rate": 4.9325000000000006e-05,
      "loss": 4.323,
      "step": 27
    },
    {
      "epoch": 0.0008571578902053448,
      "grad_norm": 4.390763759613037,
      "learning_rate": 4.93e-05,
      "loss": 3.9899,
      "step": 28
    },
    {
      "epoch": 0.0008877706719983929,
      "grad_norm": 4.727230072021484,
      "learning_rate": 4.9275000000000005e-05,
      "loss": 3.6239,
      "step": 29
    },
    {
      "epoch": 0.0009183834537914409,
      "grad_norm": 4.3478875160217285,
      "learning_rate": 4.9250000000000004e-05,
      "loss": 3.4207,
      "step": 30
    },
    {
      "epoch": 0.0009489962355844889,
      "grad_norm": 4.034118175506592,
      "learning_rate": 4.9225000000000004e-05,
      "loss": 3.3825,
      "step": 31
    },
    {
      "epoch": 0.000979609017377537,
      "grad_norm": 3.424221992492676,
      "learning_rate": 4.92e-05,
      "loss": 3.3884,
      "step": 32
    },
    {
      "epoch": 0.001010221799170585,
      "grad_norm": 3.406214475631714,
      "learning_rate": 4.9175e-05,
      "loss": 3.7135,
      "step": 33
    },
    {
      "epoch": 0.001040834580963633,
      "grad_norm": 2.6027398109436035,
      "learning_rate": 4.915e-05,
      "loss": 3.3285,
      "step": 34
    },
    {
      "epoch": 0.001071447362756681,
      "grad_norm": 2.971949338912964,
      "learning_rate": 4.9125e-05,
      "loss": 3.3592,
      "step": 35
    },
    {
      "epoch": 0.001102060144549729,
      "grad_norm": 2.5305731296539307,
      "learning_rate": 4.91e-05,
      "loss": 3.1143,
      "step": 36
    },
    {
      "epoch": 0.0011326729263427772,
      "grad_norm": 2.9374747276306152,
      "learning_rate": 4.907500000000001e-05,
      "loss": 3.5158,
      "step": 37
    },
    {
      "epoch": 0.0011632857081358252,
      "grad_norm": 2.7213475704193115,
      "learning_rate": 4.905e-05,
      "loss": 3.004,
      "step": 38
    },
    {
      "epoch": 0.0011938984899288732,
      "grad_norm": 2.8888635635375977,
      "learning_rate": 4.9025000000000006e-05,
      "loss": 3.4957,
      "step": 39
    },
    {
      "epoch": 0.0012245112717219212,
      "grad_norm": 2.8989744186401367,
      "learning_rate": 4.9e-05,
      "loss": 3.3717,
      "step": 40
    },
    {
      "epoch": 0.0012551240535149692,
      "grad_norm": 3.2072436809539795,
      "learning_rate": 4.8975000000000005e-05,
      "loss": 3.3126,
      "step": 41
    },
    {
      "epoch": 0.0012857368353080172,
      "grad_norm": 2.974712371826172,
      "learning_rate": 4.8950000000000004e-05,
      "loss": 3.1451,
      "step": 42
    },
    {
      "epoch": 0.0013163496171010652,
      "grad_norm": 1.8562570810317993,
      "learning_rate": 4.8925e-05,
      "loss": 2.1743,
      "step": 43
    },
    {
      "epoch": 0.0013469623988941132,
      "grad_norm": 2.7891759872436523,
      "learning_rate": 4.89e-05,
      "loss": 3.0415,
      "step": 44
    },
    {
      "epoch": 0.0013775751806871612,
      "grad_norm": 2.055044412612915,
      "learning_rate": 4.8875e-05,
      "loss": 2.5616,
      "step": 45
    },
    {
      "epoch": 0.0014081879624802092,
      "grad_norm": 3.0145769119262695,
      "learning_rate": 4.885e-05,
      "loss": 3.1295,
      "step": 46
    },
    {
      "epoch": 0.0014388007442732572,
      "grad_norm": 2.0373644828796387,
      "learning_rate": 4.8825e-05,
      "loss": 2.5397,
      "step": 47
    },
    {
      "epoch": 0.0014694135260663055,
      "grad_norm": 2.8690342903137207,
      "learning_rate": 4.88e-05,
      "loss": 2.8776,
      "step": 48
    },
    {
      "epoch": 0.0015000263078593535,
      "grad_norm": 2.048023223876953,
      "learning_rate": 4.8775000000000007e-05,
      "loss": 2.359,
      "step": 49
    },
    {
      "epoch": 0.0015306390896524015,
      "grad_norm": 2.1275787353515625,
      "learning_rate": 4.875e-05,
      "loss": 2.4965,
      "step": 50
    },
    {
      "epoch": 0.0015612518714454495,
      "grad_norm": 1.6037003993988037,
      "learning_rate": 4.8725000000000005e-05,
      "loss": 2.221,
      "step": 51
    },
    {
      "epoch": 0.0015918646532384975,
      "grad_norm": 1.7947163581848145,
      "learning_rate": 4.87e-05,
      "loss": 2.2378,
      "step": 52
    },
    {
      "epoch": 0.0016224774350315455,
      "grad_norm": 2.0701205730438232,
      "learning_rate": 4.8675000000000004e-05,
      "loss": 2.4962,
      "step": 53
    },
    {
      "epoch": 0.0016530902168245935,
      "grad_norm": 1.9781723022460938,
      "learning_rate": 4.8650000000000003e-05,
      "loss": 2.5546,
      "step": 54
    },
    {
      "epoch": 0.0016837029986176415,
      "grad_norm": 1.663872480392456,
      "learning_rate": 4.8625e-05,
      "loss": 2.1797,
      "step": 55
    },
    {
      "epoch": 0.0017143157804106895,
      "grad_norm": 2.946748733520508,
      "learning_rate": 4.86e-05,
      "loss": 2.9821,
      "step": 56
    },
    {
      "epoch": 0.0017449285622037375,
      "grad_norm": 1.7021986246109009,
      "learning_rate": 4.8575e-05,
      "loss": 2.1976,
      "step": 57
    },
    {
      "epoch": 0.0017755413439967858,
      "grad_norm": 1.921453833580017,
      "learning_rate": 4.855e-05,
      "loss": 2.5441,
      "step": 58
    },
    {
      "epoch": 0.0018061541257898338,
      "grad_norm": 2.11322283744812,
      "learning_rate": 4.8525e-05,
      "loss": 2.0595,
      "step": 59
    },
    {
      "epoch": 0.0018367669075828818,
      "grad_norm": 1.3385632038116455,
      "learning_rate": 4.85e-05,
      "loss": 2.0619,
      "step": 60
    },
    {
      "epoch": 0.0018673796893759298,
      "grad_norm": 1.4987257719039917,
      "learning_rate": 4.8475000000000006e-05,
      "loss": 1.9736,
      "step": 61
    },
    {
      "epoch": 0.0018979924711689778,
      "grad_norm": 1.8409435749053955,
      "learning_rate": 4.845e-05,
      "loss": 2.38,
      "step": 62
    },
    {
      "epoch": 0.0019286052529620258,
      "grad_norm": 1.4724944829940796,
      "learning_rate": 4.8425000000000005e-05,
      "loss": 2.0932,
      "step": 63
    },
    {
      "epoch": 0.001959218034755074,
      "grad_norm": 1.2271215915679932,
      "learning_rate": 4.8400000000000004e-05,
      "loss": 1.9901,
      "step": 64
    },
    {
      "epoch": 0.001989830816548122,
      "grad_norm": 1.380914330482483,
      "learning_rate": 4.8375000000000004e-05,
      "loss": 1.9075,
      "step": 65
    },
    {
      "epoch": 0.00202044359834117,
      "grad_norm": 1.2022541761398315,
      "learning_rate": 4.835e-05,
      "loss": 2.0176,
      "step": 66
    },
    {
      "epoch": 0.002051056380134218,
      "grad_norm": 1.8821959495544434,
      "learning_rate": 4.8325e-05,
      "loss": 1.758,
      "step": 67
    },
    {
      "epoch": 0.002081669161927266,
      "grad_norm": 1.74000883102417,
      "learning_rate": 4.83e-05,
      "loss": 1.8636,
      "step": 68
    },
    {
      "epoch": 0.002112281943720314,
      "grad_norm": 1.075508952140808,
      "learning_rate": 4.8275e-05,
      "loss": 1.8982,
      "step": 69
    },
    {
      "epoch": 0.002142894725513362,
      "grad_norm": 0.9934259057044983,
      "learning_rate": 4.825e-05,
      "loss": 1.736,
      "step": 70
    },
    {
      "epoch": 0.00217350750730641,
      "grad_norm": 1.313206672668457,
      "learning_rate": 4.822500000000001e-05,
      "loss": 2.1259,
      "step": 71
    },
    {
      "epoch": 0.002204120289099458,
      "grad_norm": 0.861015260219574,
      "learning_rate": 4.82e-05,
      "loss": 1.8048,
      "step": 72
    },
    {
      "epoch": 0.002234733070892506,
      "grad_norm": 0.9260530471801758,
      "learning_rate": 4.8175000000000005e-05,
      "loss": 1.6339,
      "step": 73
    },
    {
      "epoch": 0.0022653458526855543,
      "grad_norm": 0.8771083354949951,
      "learning_rate": 4.815e-05,
      "loss": 1.6521,
      "step": 74
    },
    {
      "epoch": 0.0022959586344786023,
      "grad_norm": 0.8178094029426575,
      "learning_rate": 4.8125000000000004e-05,
      "loss": 1.6692,
      "step": 75
    },
    {
      "epoch": 0.0023265714162716503,
      "grad_norm": 0.8703726530075073,
      "learning_rate": 4.8100000000000004e-05,
      "loss": 1.6073,
      "step": 76
    },
    {
      "epoch": 0.0023571841980646983,
      "grad_norm": 0.8438004851341248,
      "learning_rate": 4.8075e-05,
      "loss": 1.8676,
      "step": 77
    },
    {
      "epoch": 0.0023877969798577463,
      "grad_norm": 0.8871971368789673,
      "learning_rate": 4.805e-05,
      "loss": 1.567,
      "step": 78
    },
    {
      "epoch": 0.0024184097616507944,
      "grad_norm": 1.357043743133545,
      "learning_rate": 4.8025e-05,
      "loss": 2.1667,
      "step": 79
    },
    {
      "epoch": 0.0024490225434438424,
      "grad_norm": 0.9974429607391357,
      "learning_rate": 4.8e-05,
      "loss": 1.6534,
      "step": 80
    },
    {
      "epoch": 0.0024796353252368904,
      "grad_norm": 0.9397422671318054,
      "learning_rate": 4.7975e-05,
      "loss": 1.7893,
      "step": 81
    },
    {
      "epoch": 0.0025102481070299384,
      "grad_norm": 1.093368411064148,
      "learning_rate": 4.795e-05,
      "loss": 1.7839,
      "step": 82
    },
    {
      "epoch": 0.0025408608888229864,
      "grad_norm": 0.8677024245262146,
      "learning_rate": 4.7925000000000006e-05,
      "loss": 1.5609,
      "step": 83
    },
    {
      "epoch": 0.0025714736706160344,
      "grad_norm": 0.7751038670539856,
      "learning_rate": 4.79e-05,
      "loss": 1.6786,
      "step": 84
    },
    {
      "epoch": 0.0026020864524090824,
      "grad_norm": 0.7199622988700867,
      "learning_rate": 4.7875000000000005e-05,
      "loss": 1.7733,
      "step": 85
    },
    {
      "epoch": 0.0026326992342021304,
      "grad_norm": 0.8470961451530457,
      "learning_rate": 4.785e-05,
      "loss": 1.6982,
      "step": 86
    },
    {
      "epoch": 0.0026633120159951784,
      "grad_norm": 0.7690158486366272,
      "learning_rate": 4.7825000000000004e-05,
      "loss": 1.6506,
      "step": 87
    },
    {
      "epoch": 0.0026939247977882264,
      "grad_norm": 0.6343263387680054,
      "learning_rate": 4.78e-05,
      "loss": 1.4935,
      "step": 88
    },
    {
      "epoch": 0.0027245375795812744,
      "grad_norm": 0.6943231821060181,
      "learning_rate": 4.7775e-05,
      "loss": 1.6566,
      "step": 89
    },
    {
      "epoch": 0.0027551503613743224,
      "grad_norm": 0.8234266638755798,
      "learning_rate": 4.775e-05,
      "loss": 1.5748,
      "step": 90
    },
    {
      "epoch": 0.0027857631431673704,
      "grad_norm": 0.867545485496521,
      "learning_rate": 4.7725e-05,
      "loss": 1.7692,
      "step": 91
    },
    {
      "epoch": 0.0028163759249604185,
      "grad_norm": 0.7488975524902344,
      "learning_rate": 4.77e-05,
      "loss": 1.8474,
      "step": 92
    },
    {
      "epoch": 0.0028469887067534665,
      "grad_norm": 0.7941266894340515,
      "learning_rate": 4.7675e-05,
      "loss": 1.3889,
      "step": 93
    },
    {
      "epoch": 0.0028776014885465145,
      "grad_norm": 1.142926812171936,
      "learning_rate": 4.765e-05,
      "loss": 1.71,
      "step": 94
    },
    {
      "epoch": 0.002908214270339563,
      "grad_norm": 0.7892361283302307,
      "learning_rate": 4.7625000000000006e-05,
      "loss": 1.6848,
      "step": 95
    },
    {
      "epoch": 0.002938827052132611,
      "grad_norm": 0.846000075340271,
      "learning_rate": 4.76e-05,
      "loss": 1.5253,
      "step": 96
    },
    {
      "epoch": 0.002969439833925659,
      "grad_norm": 0.7543118000030518,
      "learning_rate": 4.7575000000000004e-05,
      "loss": 1.5782,
      "step": 97
    },
    {
      "epoch": 0.003000052615718707,
      "grad_norm": 0.6432281732559204,
      "learning_rate": 4.755e-05,
      "loss": 1.5025,
      "step": 98
    },
    {
      "epoch": 0.003030665397511755,
      "grad_norm": 0.84007328748703,
      "learning_rate": 4.7525e-05,
      "loss": 1.6096,
      "step": 99
    },
    {
      "epoch": 0.003061278179304803,
      "grad_norm": 0.6275733709335327,
      "learning_rate": 4.75e-05,
      "loss": 1.7116,
      "step": 100
    },
    {
      "epoch": 0.003091890961097851,
      "grad_norm": 0.8915499448776245,
      "learning_rate": 4.7475e-05,
      "loss": 1.8146,
      "step": 101
    },
    {
      "epoch": 0.003122503742890899,
      "grad_norm": 0.6665530204772949,
      "learning_rate": 4.745e-05,
      "loss": 1.5385,
      "step": 102
    },
    {
      "epoch": 0.003153116524683947,
      "grad_norm": 1.0894556045532227,
      "learning_rate": 4.7425e-05,
      "loss": 1.3557,
      "step": 103
    },
    {
      "epoch": 0.003183729306476995,
      "grad_norm": 1.0716320276260376,
      "learning_rate": 4.74e-05,
      "loss": 1.7137,
      "step": 104
    },
    {
      "epoch": 0.003214342088270043,
      "grad_norm": 0.698582649230957,
      "learning_rate": 4.7375e-05,
      "loss": 1.5723,
      "step": 105
    },
    {
      "epoch": 0.003244954870063091,
      "grad_norm": 0.851190447807312,
      "learning_rate": 4.735e-05,
      "loss": 1.6534,
      "step": 106
    },
    {
      "epoch": 0.003275567651856139,
      "grad_norm": 0.6703295111656189,
      "learning_rate": 4.7325000000000005e-05,
      "loss": 1.4634,
      "step": 107
    },
    {
      "epoch": 0.003306180433649187,
      "grad_norm": 0.7606415152549744,
      "learning_rate": 4.73e-05,
      "loss": 1.6406,
      "step": 108
    },
    {
      "epoch": 0.003336793215442235,
      "grad_norm": 0.5245091915130615,
      "learning_rate": 4.7275000000000004e-05,
      "loss": 1.247,
      "step": 109
    },
    {
      "epoch": 0.003367405997235283,
      "grad_norm": 0.8049989938735962,
      "learning_rate": 4.7249999999999997e-05,
      "loss": 1.542,
      "step": 110
    },
    {
      "epoch": 0.003398018779028331,
      "grad_norm": 0.8165659308433533,
      "learning_rate": 4.7225e-05,
      "loss": 1.6173,
      "step": 111
    },
    {
      "epoch": 0.003428631560821379,
      "grad_norm": 0.9165499210357666,
      "learning_rate": 4.72e-05,
      "loss": 1.709,
      "step": 112
    },
    {
      "epoch": 0.003459244342614427,
      "grad_norm": 0.735424816608429,
      "learning_rate": 4.7175e-05,
      "loss": 1.6076,
      "step": 113
    },
    {
      "epoch": 0.003489857124407475,
      "grad_norm": 1.1733359098434448,
      "learning_rate": 4.715e-05,
      "loss": 1.7905,
      "step": 114
    },
    {
      "epoch": 0.0035204699062005235,
      "grad_norm": 0.5497770309448242,
      "learning_rate": 4.7125e-05,
      "loss": 1.4097,
      "step": 115
    },
    {
      "epoch": 0.0035510826879935715,
      "grad_norm": 0.7151444554328918,
      "learning_rate": 4.71e-05,
      "loss": 1.3717,
      "step": 116
    },
    {
      "epoch": 0.0035816954697866195,
      "grad_norm": 0.6513221859931946,
      "learning_rate": 4.7075e-05,
      "loss": 1.6882,
      "step": 117
    },
    {
      "epoch": 0.0036123082515796675,
      "grad_norm": 0.6310427188873291,
      "learning_rate": 4.705e-05,
      "loss": 1.5786,
      "step": 118
    },
    {
      "epoch": 0.0036429210333727155,
      "grad_norm": 0.7217684388160706,
      "learning_rate": 4.7025000000000005e-05,
      "loss": 1.4635,
      "step": 119
    },
    {
      "epoch": 0.0036735338151657635,
      "grad_norm": 0.9914249181747437,
      "learning_rate": 4.7e-05,
      "loss": 1.5259,
      "step": 120
    },
    {
      "epoch": 0.0037041465969588116,
      "grad_norm": 0.5706192255020142,
      "learning_rate": 4.6975000000000003e-05,
      "loss": 1.3961,
      "step": 121
    },
    {
      "epoch": 0.0037347593787518596,
      "grad_norm": 0.5370204448699951,
      "learning_rate": 4.695e-05,
      "loss": 1.3465,
      "step": 122
    },
    {
      "epoch": 0.0037653721605449076,
      "grad_norm": 0.7165305614471436,
      "learning_rate": 4.6925e-05,
      "loss": 1.8627,
      "step": 123
    },
    {
      "epoch": 0.0037959849423379556,
      "grad_norm": 0.6781850457191467,
      "learning_rate": 4.69e-05,
      "loss": 1.7014,
      "step": 124
    },
    {
      "epoch": 0.0038265977241310036,
      "grad_norm": 0.6935871839523315,
      "learning_rate": 4.6875e-05,
      "loss": 1.5602,
      "step": 125
    },
    {
      "epoch": 0.0038572105059240516,
      "grad_norm": 0.7030614614486694,
      "learning_rate": 4.685000000000001e-05,
      "loss": 1.6883,
      "step": 126
    },
    {
      "epoch": 0.0038878232877170996,
      "grad_norm": 0.6355715394020081,
      "learning_rate": 4.6825e-05,
      "loss": 1.4341,
      "step": 127
    },
    {
      "epoch": 0.003918436069510148,
      "grad_norm": 0.6512605547904968,
      "learning_rate": 4.6800000000000006e-05,
      "loss": 1.4183,
      "step": 128
    },
    {
      "epoch": 0.003949048851303196,
      "grad_norm": 0.6355776190757751,
      "learning_rate": 4.6775000000000005e-05,
      "loss": 1.2918,
      "step": 129
    },
    {
      "epoch": 0.003979661633096244,
      "grad_norm": 0.552229106426239,
      "learning_rate": 4.6750000000000005e-05,
      "loss": 1.2731,
      "step": 130
    },
    {
      "epoch": 0.004010274414889292,
      "grad_norm": 0.9546644687652588,
      "learning_rate": 4.6725000000000004e-05,
      "loss": 1.8082,
      "step": 131
    },
    {
      "epoch": 0.00404088719668234,
      "grad_norm": 0.5696132183074951,
      "learning_rate": 4.6700000000000003e-05,
      "loss": 1.5704,
      "step": 132
    },
    {
      "epoch": 0.004071499978475388,
      "grad_norm": 0.49629083275794983,
      "learning_rate": 4.6675e-05,
      "loss": 1.2108,
      "step": 133
    },
    {
      "epoch": 0.004102112760268436,
      "grad_norm": 0.6076759696006775,
      "learning_rate": 4.665e-05,
      "loss": 1.3235,
      "step": 134
    },
    {
      "epoch": 0.004132725542061484,
      "grad_norm": 0.6745683550834656,
      "learning_rate": 4.6625e-05,
      "loss": 1.4622,
      "step": 135
    },
    {
      "epoch": 0.004163338323854532,
      "grad_norm": 0.601839542388916,
      "learning_rate": 4.660000000000001e-05,
      "loss": 1.441,
      "step": 136
    },
    {
      "epoch": 0.00419395110564758,
      "grad_norm": 0.700334370136261,
      "learning_rate": 4.6575e-05,
      "loss": 1.3807,
      "step": 137
    },
    {
      "epoch": 0.004224563887440628,
      "grad_norm": 0.6876362562179565,
      "learning_rate": 4.655000000000001e-05,
      "loss": 1.392,
      "step": 138
    },
    {
      "epoch": 0.004255176669233676,
      "grad_norm": 0.5674521923065186,
      "learning_rate": 4.6525e-05,
      "loss": 1.3895,
      "step": 139
    },
    {
      "epoch": 0.004285789451026724,
      "grad_norm": 0.4558236598968506,
      "learning_rate": 4.6500000000000005e-05,
      "loss": 1.21,
      "step": 140
    },
    {
      "epoch": 0.004316402232819772,
      "grad_norm": 0.4896508753299713,
      "learning_rate": 4.6475000000000005e-05,
      "loss": 1.4345,
      "step": 141
    },
    {
      "epoch": 0.00434701501461282,
      "grad_norm": 0.5911825895309448,
      "learning_rate": 4.6450000000000004e-05,
      "loss": 1.3865,
      "step": 142
    },
    {
      "epoch": 0.004377627796405868,
      "grad_norm": 0.6147511005401611,
      "learning_rate": 4.6425000000000004e-05,
      "loss": 1.5006,
      "step": 143
    },
    {
      "epoch": 0.004408240578198916,
      "grad_norm": 0.4972354769706726,
      "learning_rate": 4.64e-05,
      "loss": 1.3357,
      "step": 144
    },
    {
      "epoch": 0.004438853359991964,
      "grad_norm": 0.7004488706588745,
      "learning_rate": 4.6375e-05,
      "loss": 1.5131,
      "step": 145
    },
    {
      "epoch": 0.004469466141785012,
      "grad_norm": 0.5502020120620728,
      "learning_rate": 4.635e-05,
      "loss": 1.374,
      "step": 146
    },
    {
      "epoch": 0.004500078923578061,
      "grad_norm": 0.9149038195610046,
      "learning_rate": 4.6325e-05,
      "loss": 1.1544,
      "step": 147
    },
    {
      "epoch": 0.004530691705371109,
      "grad_norm": 0.45347583293914795,
      "learning_rate": 4.630000000000001e-05,
      "loss": 1.2775,
      "step": 148
    },
    {
      "epoch": 0.004561304487164157,
      "grad_norm": 0.6348723769187927,
      "learning_rate": 4.6275e-05,
      "loss": 1.2563,
      "step": 149
    },
    {
      "epoch": 0.004591917268957205,
      "grad_norm": 0.4720090627670288,
      "learning_rate": 4.6250000000000006e-05,
      "loss": 1.3891,
      "step": 150
    },
    {
      "epoch": 0.004622530050750253,
      "grad_norm": 0.6165446639060974,
      "learning_rate": 4.6225e-05,
      "loss": 1.5441,
      "step": 151
    },
    {
      "epoch": 0.004653142832543301,
      "grad_norm": 0.4597586393356323,
      "learning_rate": 4.6200000000000005e-05,
      "loss": 1.3519,
      "step": 152
    },
    {
      "epoch": 0.004683755614336349,
      "grad_norm": 0.46697214245796204,
      "learning_rate": 4.6175000000000004e-05,
      "loss": 1.3465,
      "step": 153
    },
    {
      "epoch": 0.004714368396129397,
      "grad_norm": 0.7597264647483826,
      "learning_rate": 4.6150000000000004e-05,
      "loss": 1.337,
      "step": 154
    },
    {
      "epoch": 0.004744981177922445,
      "grad_norm": 0.7377416491508484,
      "learning_rate": 4.6125e-05,
      "loss": 1.687,
      "step": 155
    },
    {
      "epoch": 0.004775593959715493,
      "grad_norm": 0.587668776512146,
      "learning_rate": 4.61e-05,
      "loss": 1.4847,
      "step": 156
    },
    {
      "epoch": 0.004806206741508541,
      "grad_norm": 0.6438502669334412,
      "learning_rate": 4.6075e-05,
      "loss": 1.1997,
      "step": 157
    },
    {
      "epoch": 0.004836819523301589,
      "grad_norm": 0.6206082701683044,
      "learning_rate": 4.605e-05,
      "loss": 1.5131,
      "step": 158
    },
    {
      "epoch": 0.004867432305094637,
      "grad_norm": 0.5365675091743469,
      "learning_rate": 4.6025e-05,
      "loss": 1.4782,
      "step": 159
    },
    {
      "epoch": 0.004898045086887685,
      "grad_norm": 0.7455544471740723,
      "learning_rate": 4.600000000000001e-05,
      "loss": 1.3798,
      "step": 160
    },
    {
      "epoch": 0.004928657868680733,
      "grad_norm": 0.48169374465942383,
      "learning_rate": 4.5975e-05,
      "loss": 1.2732,
      "step": 161
    },
    {
      "epoch": 0.004959270650473781,
      "grad_norm": 0.44339457154273987,
      "learning_rate": 4.5950000000000006e-05,
      "loss": 1.081,
      "step": 162
    },
    {
      "epoch": 0.004989883432266829,
      "grad_norm": 0.6556842923164368,
      "learning_rate": 4.5925e-05,
      "loss": 1.3415,
      "step": 163
    },
    {
      "epoch": 0.005020496214059877,
      "grad_norm": 0.4652569890022278,
      "learning_rate": 4.5900000000000004e-05,
      "loss": 1.2523,
      "step": 164
    },
    {
      "epoch": 0.005051108995852925,
      "grad_norm": 0.6618898510932922,
      "learning_rate": 4.5875000000000004e-05,
      "loss": 1.485,
      "step": 165
    },
    {
      "epoch": 0.005081721777645973,
      "grad_norm": 0.4309523403644562,
      "learning_rate": 4.585e-05,
      "loss": 1.182,
      "step": 166
    },
    {
      "epoch": 0.005112334559439021,
      "grad_norm": 0.6413261890411377,
      "learning_rate": 4.5825e-05,
      "loss": 1.3664,
      "step": 167
    },
    {
      "epoch": 0.005142947341232069,
      "grad_norm": 0.487090140581131,
      "learning_rate": 4.58e-05,
      "loss": 1.3207,
      "step": 168
    },
    {
      "epoch": 0.005173560123025117,
      "grad_norm": 0.47292882204055786,
      "learning_rate": 4.5775e-05,
      "loss": 1.1418,
      "step": 169
    },
    {
      "epoch": 0.005204172904818165,
      "grad_norm": 0.4816010892391205,
      "learning_rate": 4.575e-05,
      "loss": 1.296,
      "step": 170
    },
    {
      "epoch": 0.005234785686611213,
      "grad_norm": 0.46606647968292236,
      "learning_rate": 4.5725e-05,
      "loss": 1.0349,
      "step": 171
    },
    {
      "epoch": 0.005265398468404261,
      "grad_norm": 0.3764216899871826,
      "learning_rate": 4.5700000000000006e-05,
      "loss": 1.2529,
      "step": 172
    },
    {
      "epoch": 0.005296011250197309,
      "grad_norm": 0.4803175628185272,
      "learning_rate": 4.5675e-05,
      "loss": 1.259,
      "step": 173
    },
    {
      "epoch": 0.005326624031990357,
      "grad_norm": 0.3913346230983734,
      "learning_rate": 4.5650000000000005e-05,
      "loss": 1.1876,
      "step": 174
    },
    {
      "epoch": 0.005357236813783405,
      "grad_norm": 0.6495143175125122,
      "learning_rate": 4.5625e-05,
      "loss": 1.4597,
      "step": 175
    },
    {
      "epoch": 0.005387849595576453,
      "grad_norm": 0.7035049796104431,
      "learning_rate": 4.5600000000000004e-05,
      "loss": 1.3313,
      "step": 176
    },
    {
      "epoch": 0.005418462377369501,
      "grad_norm": 0.38885799050331116,
      "learning_rate": 4.5575e-05,
      "loss": 1.0694,
      "step": 177
    },
    {
      "epoch": 0.005449075159162549,
      "grad_norm": 0.4177185893058777,
      "learning_rate": 4.555e-05,
      "loss": 1.3548,
      "step": 178
    },
    {
      "epoch": 0.005479687940955597,
      "grad_norm": 0.3824990391731262,
      "learning_rate": 4.5525e-05,
      "loss": 1.1421,
      "step": 179
    },
    {
      "epoch": 0.005510300722748645,
      "grad_norm": 0.4456152319908142,
      "learning_rate": 4.55e-05,
      "loss": 1.3902,
      "step": 180
    },
    {
      "epoch": 0.005540913504541693,
      "grad_norm": 0.4500466585159302,
      "learning_rate": 4.5475e-05,
      "loss": 1.2778,
      "step": 181
    },
    {
      "epoch": 0.005571526286334741,
      "grad_norm": 0.3600282669067383,
      "learning_rate": 4.545000000000001e-05,
      "loss": 1.1838,
      "step": 182
    },
    {
      "epoch": 0.005602139068127789,
      "grad_norm": 0.3778589069843292,
      "learning_rate": 4.5425e-05,
      "loss": 1.1702,
      "step": 183
    },
    {
      "epoch": 0.005632751849920837,
      "grad_norm": 0.3688429594039917,
      "learning_rate": 4.5400000000000006e-05,
      "loss": 1.1849,
      "step": 184
    },
    {
      "epoch": 0.005663364631713885,
      "grad_norm": 0.28500115871429443,
      "learning_rate": 4.5375e-05,
      "loss": 1.0787,
      "step": 185
    },
    {
      "epoch": 0.005693977413506933,
      "grad_norm": 0.6923081278800964,
      "learning_rate": 4.5350000000000005e-05,
      "loss": 1.2256,
      "step": 186
    },
    {
      "epoch": 0.005724590195299981,
      "grad_norm": 0.3610539734363556,
      "learning_rate": 4.5325000000000004e-05,
      "loss": 1.1187,
      "step": 187
    },
    {
      "epoch": 0.005755202977093029,
      "grad_norm": 0.4832324683666229,
      "learning_rate": 4.53e-05,
      "loss": 1.2071,
      "step": 188
    },
    {
      "epoch": 0.005785815758886078,
      "grad_norm": 0.3430784344673157,
      "learning_rate": 4.5275e-05,
      "loss": 1.2734,
      "step": 189
    },
    {
      "epoch": 0.005816428540679126,
      "grad_norm": 0.36265236139297485,
      "learning_rate": 4.525e-05,
      "loss": 1.2756,
      "step": 190
    },
    {
      "epoch": 0.005847041322472174,
      "grad_norm": 0.4810546338558197,
      "learning_rate": 4.5225e-05,
      "loss": 1.3209,
      "step": 191
    },
    {
      "epoch": 0.005877654104265222,
      "grad_norm": 0.48843711614608765,
      "learning_rate": 4.52e-05,
      "loss": 1.4811,
      "step": 192
    },
    {
      "epoch": 0.00590826688605827,
      "grad_norm": 0.4827427566051483,
      "learning_rate": 4.5175e-05,
      "loss": 1.3953,
      "step": 193
    },
    {
      "epoch": 0.005938879667851318,
      "grad_norm": 0.5305744409561157,
      "learning_rate": 4.5150000000000006e-05,
      "loss": 1.5882,
      "step": 194
    },
    {
      "epoch": 0.005969492449644366,
      "grad_norm": 0.38677212595939636,
      "learning_rate": 4.5125e-05,
      "loss": 1.0727,
      "step": 195
    },
    {
      "epoch": 0.006000105231437414,
      "grad_norm": 0.39753592014312744,
      "learning_rate": 4.5100000000000005e-05,
      "loss": 1.4645,
      "step": 196
    },
    {
      "epoch": 0.006030718013230462,
      "grad_norm": 0.522423505783081,
      "learning_rate": 4.5075e-05,
      "loss": 0.9498,
      "step": 197
    },
    {
      "epoch": 0.00606133079502351,
      "grad_norm": 0.47243109345436096,
      "learning_rate": 4.5050000000000004e-05,
      "loss": 1.0428,
      "step": 198
    },
    {
      "epoch": 0.006091943576816558,
      "grad_norm": 0.3697422444820404,
      "learning_rate": 4.5025000000000003e-05,
      "loss": 1.2859,
      "step": 199
    },
    {
      "epoch": 0.006122556358609606,
      "grad_norm": 0.3098140358924866,
      "learning_rate": 4.5e-05,
      "loss": 1.1047,
      "step": 200
    },
    {
      "epoch": 0.006153169140402654,
      "grad_norm": 0.38746726512908936,
      "learning_rate": 4.4975e-05,
      "loss": 1.2299,
      "step": 201
    },
    {
      "epoch": 0.006183781922195702,
      "grad_norm": 0.4374942481517792,
      "learning_rate": 4.495e-05,
      "loss": 1.2585,
      "step": 202
    },
    {
      "epoch": 0.00621439470398875,
      "grad_norm": 0.4549533426761627,
      "learning_rate": 4.4925e-05,
      "loss": 1.2989,
      "step": 203
    },
    {
      "epoch": 0.006245007485781798,
      "grad_norm": 0.5059826970100403,
      "learning_rate": 4.49e-05,
      "loss": 1.2002,
      "step": 204
    },
    {
      "epoch": 0.006275620267574846,
      "grad_norm": 0.3983885645866394,
      "learning_rate": 4.4875e-05,
      "loss": 1.299,
      "step": 205
    },
    {
      "epoch": 0.006306233049367894,
      "grad_norm": 1.0123388767242432,
      "learning_rate": 4.4850000000000006e-05,
      "loss": 1.1865,
      "step": 206
    },
    {
      "epoch": 0.006336845831160942,
      "grad_norm": 0.41973376274108887,
      "learning_rate": 4.4825e-05,
      "loss": 1.322,
      "step": 207
    },
    {
      "epoch": 0.00636745861295399,
      "grad_norm": 0.30086344480514526,
      "learning_rate": 4.4800000000000005e-05,
      "loss": 1.0516,
      "step": 208
    },
    {
      "epoch": 0.006398071394747038,
      "grad_norm": 0.5842503309249878,
      "learning_rate": 4.4775e-05,
      "loss": 1.1424,
      "step": 209
    },
    {
      "epoch": 0.006428684176540086,
      "grad_norm": 0.46193867921829224,
      "learning_rate": 4.4750000000000004e-05,
      "loss": 1.3607,
      "step": 210
    },
    {
      "epoch": 0.006459296958333134,
      "grad_norm": 0.38243287801742554,
      "learning_rate": 4.4725e-05,
      "loss": 1.3181,
      "step": 211
    },
    {
      "epoch": 0.006489909740126182,
      "grad_norm": 0.37284815311431885,
      "learning_rate": 4.47e-05,
      "loss": 1.2067,
      "step": 212
    },
    {
      "epoch": 0.00652052252191923,
      "grad_norm": 0.31831157207489014,
      "learning_rate": 4.4675e-05,
      "loss": 1.201,
      "step": 213
    },
    {
      "epoch": 0.006551135303712278,
      "grad_norm": 0.5502720475196838,
      "learning_rate": 4.465e-05,
      "loss": 1.4088,
      "step": 214
    },
    {
      "epoch": 0.006581748085505326,
      "grad_norm": 0.33159926533699036,
      "learning_rate": 4.4625e-05,
      "loss": 1.2081,
      "step": 215
    },
    {
      "epoch": 0.006612360867298374,
      "grad_norm": 0.36955511569976807,
      "learning_rate": 4.46e-05,
      "loss": 1.3067,
      "step": 216
    },
    {
      "epoch": 0.006642973649091422,
      "grad_norm": 0.47687390446662903,
      "learning_rate": 4.4575e-05,
      "loss": 1.2447,
      "step": 217
    },
    {
      "epoch": 0.00667358643088447,
      "grad_norm": 0.28809696435928345,
      "learning_rate": 4.4550000000000005e-05,
      "loss": 1.0808,
      "step": 218
    },
    {
      "epoch": 0.006704199212677518,
      "grad_norm": 0.38485610485076904,
      "learning_rate": 4.4525e-05,
      "loss": 1.303,
      "step": 219
    },
    {
      "epoch": 0.006734811994470566,
      "grad_norm": 0.40298861265182495,
      "learning_rate": 4.4500000000000004e-05,
      "loss": 1.3083,
      "step": 220
    },
    {
      "epoch": 0.006765424776263614,
      "grad_norm": 0.3364523947238922,
      "learning_rate": 4.4475e-05,
      "loss": 1.1547,
      "step": 221
    },
    {
      "epoch": 0.006796037558056662,
      "grad_norm": 0.31758078932762146,
      "learning_rate": 4.445e-05,
      "loss": 1.1696,
      "step": 222
    },
    {
      "epoch": 0.00682665033984971,
      "grad_norm": 0.7932029366493225,
      "learning_rate": 4.4425e-05,
      "loss": 1.0809,
      "step": 223
    },
    {
      "epoch": 0.006857263121642758,
      "grad_norm": 0.3669879138469696,
      "learning_rate": 4.44e-05,
      "loss": 1.1551,
      "step": 224
    },
    {
      "epoch": 0.006887875903435806,
      "grad_norm": 0.2542504668235779,
      "learning_rate": 4.4375e-05,
      "loss": 0.918,
      "step": 225
    },
    {
      "epoch": 0.006918488685228854,
      "grad_norm": 0.6452179551124573,
      "learning_rate": 4.435e-05,
      "loss": 1.019,
      "step": 226
    },
    {
      "epoch": 0.006949101467021902,
      "grad_norm": 0.3735548257827759,
      "learning_rate": 4.4325e-05,
      "loss": 1.1951,
      "step": 227
    },
    {
      "epoch": 0.00697971424881495,
      "grad_norm": 0.5885425209999084,
      "learning_rate": 4.43e-05,
      "loss": 1.2562,
      "step": 228
    },
    {
      "epoch": 0.007010327030607998,
      "grad_norm": 0.29807329177856445,
      "learning_rate": 4.4275e-05,
      "loss": 1.1336,
      "step": 229
    },
    {
      "epoch": 0.007040939812401047,
      "grad_norm": 0.29619550704956055,
      "learning_rate": 4.4250000000000005e-05,
      "loss": 1.044,
      "step": 230
    },
    {
      "epoch": 0.007071552594194095,
      "grad_norm": 0.3926753103733063,
      "learning_rate": 4.4225e-05,
      "loss": 1.3405,
      "step": 231
    },
    {
      "epoch": 0.007102165375987143,
      "grad_norm": 0.2869264483451843,
      "learning_rate": 4.4200000000000004e-05,
      "loss": 1.052,
      "step": 232
    },
    {
      "epoch": 0.007132778157780191,
      "grad_norm": 0.29142582416534424,
      "learning_rate": 4.4174999999999996e-05,
      "loss": 1.1612,
      "step": 233
    },
    {
      "epoch": 0.007163390939573239,
      "grad_norm": 0.34663820266723633,
      "learning_rate": 4.415e-05,
      "loss": 1.2379,
      "step": 234
    },
    {
      "epoch": 0.007194003721366287,
      "grad_norm": 0.27558591961860657,
      "learning_rate": 4.4125e-05,
      "loss": 1.1827,
      "step": 235
    },
    {
      "epoch": 0.007224616503159335,
      "grad_norm": 0.318002313375473,
      "learning_rate": 4.41e-05,
      "loss": 1.1468,
      "step": 236
    },
    {
      "epoch": 0.007255229284952383,
      "grad_norm": 0.4704686403274536,
      "learning_rate": 4.4075e-05,
      "loss": 1.3175,
      "step": 237
    },
    {
      "epoch": 0.007285842066745431,
      "grad_norm": 0.2914208769798279,
      "learning_rate": 4.405e-05,
      "loss": 1.0301,
      "step": 238
    },
    {
      "epoch": 0.007316454848538479,
      "grad_norm": 0.42913445830345154,
      "learning_rate": 4.4025e-05,
      "loss": 1.1734,
      "step": 239
    },
    {
      "epoch": 0.007347067630331527,
      "grad_norm": 0.3586747646331787,
      "learning_rate": 4.4000000000000006e-05,
      "loss": 1.1646,
      "step": 240
    },
    {
      "epoch": 0.007377680412124575,
      "grad_norm": 0.27179181575775146,
      "learning_rate": 4.3975e-05,
      "loss": 1.0642,
      "step": 241
    },
    {
      "epoch": 0.007408293193917623,
      "grad_norm": 0.28132525086402893,
      "learning_rate": 4.3950000000000004e-05,
      "loss": 1.0653,
      "step": 242
    },
    {
      "epoch": 0.007438905975710671,
      "grad_norm": 0.39449411630630493,
      "learning_rate": 4.3925e-05,
      "loss": 1.2665,
      "step": 243
    },
    {
      "epoch": 0.007469518757503719,
      "grad_norm": 0.7181042432785034,
      "learning_rate": 4.39e-05,
      "loss": 1.0158,
      "step": 244
    },
    {
      "epoch": 0.007500131539296767,
      "grad_norm": 0.3817023038864136,
      "learning_rate": 4.3875e-05,
      "loss": 1.1011,
      "step": 245
    },
    {
      "epoch": 0.007530744321089815,
      "grad_norm": 0.4603152275085449,
      "learning_rate": 4.385e-05,
      "loss": 1.1116,
      "step": 246
    },
    {
      "epoch": 0.007561357102882863,
      "grad_norm": 0.23188698291778564,
      "learning_rate": 4.3825e-05,
      "loss": 0.8222,
      "step": 247
    },
    {
      "epoch": 0.007591969884675911,
      "grad_norm": 0.30980584025382996,
      "learning_rate": 4.38e-05,
      "loss": 1.1244,
      "step": 248
    },
    {
      "epoch": 0.007622582666468959,
      "grad_norm": 0.30840006470680237,
      "learning_rate": 4.3775e-05,
      "loss": 1.047,
      "step": 249
    },
    {
      "epoch": 0.007653195448262007,
      "grad_norm": 0.34617090225219727,
      "learning_rate": 4.375e-05,
      "loss": 1.093,
      "step": 250
    },
    {
      "epoch": 0.007683808230055055,
      "grad_norm": 0.282256156206131,
      "learning_rate": 4.3725000000000006e-05,
      "loss": 1.0414,
      "step": 251
    },
    {
      "epoch": 0.007714421011848103,
      "grad_norm": 0.29317784309387207,
      "learning_rate": 4.3700000000000005e-05,
      "loss": 1.1664,
      "step": 252
    },
    {
      "epoch": 0.007745033793641151,
      "grad_norm": 0.33734336495399475,
      "learning_rate": 4.3675000000000005e-05,
      "loss": 1.2725,
      "step": 253
    },
    {
      "epoch": 0.007775646575434199,
      "grad_norm": 0.2963639199733734,
      "learning_rate": 4.3650000000000004e-05,
      "loss": 1.1177,
      "step": 254
    },
    {
      "epoch": 0.007806259357227247,
      "grad_norm": 0.3168213665485382,
      "learning_rate": 4.3625e-05,
      "loss": 1.2415,
      "step": 255
    },
    {
      "epoch": 0.007836872139020295,
      "grad_norm": 0.3703044354915619,
      "learning_rate": 4.36e-05,
      "loss": 1.0274,
      "step": 256
    },
    {
      "epoch": 0.007867484920813343,
      "grad_norm": 0.2970989942550659,
      "learning_rate": 4.3575e-05,
      "loss": 1.1473,
      "step": 257
    },
    {
      "epoch": 0.007898097702606391,
      "grad_norm": 0.2614826560020447,
      "learning_rate": 4.355e-05,
      "loss": 0.9474,
      "step": 258
    },
    {
      "epoch": 0.00792871048439944,
      "grad_norm": 0.3015413284301758,
      "learning_rate": 4.352500000000001e-05,
      "loss": 1.13,
      "step": 259
    },
    {
      "epoch": 0.007959323266192487,
      "grad_norm": 0.23016412556171417,
      "learning_rate": 4.35e-05,
      "loss": 0.9721,
      "step": 260
    },
    {
      "epoch": 0.007989936047985535,
      "grad_norm": 0.30088579654693604,
      "learning_rate": 4.3475000000000006e-05,
      "loss": 1.0868,
      "step": 261
    },
    {
      "epoch": 0.008020548829778583,
      "grad_norm": 0.279689759016037,
      "learning_rate": 4.345e-05,
      "loss": 1.0155,
      "step": 262
    },
    {
      "epoch": 0.008051161611571631,
      "grad_norm": 0.38018348813056946,
      "learning_rate": 4.3425000000000005e-05,
      "loss": 1.0624,
      "step": 263
    },
    {
      "epoch": 0.00808177439336468,
      "grad_norm": 0.3055655062198639,
      "learning_rate": 4.3400000000000005e-05,
      "loss": 1.08,
      "step": 264
    },
    {
      "epoch": 0.008112387175157727,
      "grad_norm": 0.3127528727054596,
      "learning_rate": 4.3375000000000004e-05,
      "loss": 0.9651,
      "step": 265
    },
    {
      "epoch": 0.008142999956950775,
      "grad_norm": 0.28754016757011414,
      "learning_rate": 4.335e-05,
      "loss": 1.0942,
      "step": 266
    },
    {
      "epoch": 0.008173612738743823,
      "grad_norm": 0.29250219464302063,
      "learning_rate": 4.3325e-05,
      "loss": 1.1359,
      "step": 267
    },
    {
      "epoch": 0.008204225520536871,
      "grad_norm": 0.30653080344200134,
      "learning_rate": 4.33e-05,
      "loss": 1.1618,
      "step": 268
    },
    {
      "epoch": 0.00823483830232992,
      "grad_norm": 0.35127562284469604,
      "learning_rate": 4.3275e-05,
      "loss": 1.099,
      "step": 269
    },
    {
      "epoch": 0.008265451084122967,
      "grad_norm": 0.6432350873947144,
      "learning_rate": 4.325e-05,
      "loss": 0.9423,
      "step": 270
    },
    {
      "epoch": 0.008296063865916015,
      "grad_norm": 0.286377876996994,
      "learning_rate": 4.322500000000001e-05,
      "loss": 1.1373,
      "step": 271
    },
    {
      "epoch": 0.008326676647709063,
      "grad_norm": 0.26014837622642517,
      "learning_rate": 4.32e-05,
      "loss": 1.0286,
      "step": 272
    },
    {
      "epoch": 0.008357289429502111,
      "grad_norm": 0.24751794338226318,
      "learning_rate": 4.3175000000000006e-05,
      "loss": 1.1019,
      "step": 273
    },
    {
      "epoch": 0.00838790221129516,
      "grad_norm": 0.6227604746818542,
      "learning_rate": 4.315e-05,
      "loss": 0.9121,
      "step": 274
    },
    {
      "epoch": 0.008418514993088207,
      "grad_norm": 0.32431575655937195,
      "learning_rate": 4.3125000000000005e-05,
      "loss": 1.2016,
      "step": 275
    },
    {
      "epoch": 0.008449127774881255,
      "grad_norm": 0.24957340955734253,
      "learning_rate": 4.3100000000000004e-05,
      "loss": 1.0418,
      "step": 276
    },
    {
      "epoch": 0.008479740556674303,
      "grad_norm": 0.30254772305488586,
      "learning_rate": 4.3075000000000003e-05,
      "loss": 1.1357,
      "step": 277
    },
    {
      "epoch": 0.008510353338467351,
      "grad_norm": 0.33814331889152527,
      "learning_rate": 4.305e-05,
      "loss": 1.1195,
      "step": 278
    },
    {
      "epoch": 0.0085409661202604,
      "grad_norm": 0.3048022985458374,
      "learning_rate": 4.3025e-05,
      "loss": 1.0546,
      "step": 279
    },
    {
      "epoch": 0.008571578902053447,
      "grad_norm": 0.32221147418022156,
      "learning_rate": 4.3e-05,
      "loss": 1.1297,
      "step": 280
    },
    {
      "epoch": 0.008602191683846495,
      "grad_norm": 0.4116046130657196,
      "learning_rate": 4.2975e-05,
      "loss": 1.1954,
      "step": 281
    },
    {
      "epoch": 0.008632804465639543,
      "grad_norm": 0.33012160658836365,
      "learning_rate": 4.295e-05,
      "loss": 1.0805,
      "step": 282
    },
    {
      "epoch": 0.008663417247432591,
      "grad_norm": 0.22367143630981445,
      "learning_rate": 4.2925000000000007e-05,
      "loss": 0.9125,
      "step": 283
    },
    {
      "epoch": 0.00869403002922564,
      "grad_norm": 0.2819913923740387,
      "learning_rate": 4.29e-05,
      "loss": 1.1961,
      "step": 284
    },
    {
      "epoch": 0.008724642811018687,
      "grad_norm": 0.23264425992965698,
      "learning_rate": 4.2875000000000005e-05,
      "loss": 1.0629,
      "step": 285
    },
    {
      "epoch": 0.008755255592811735,
      "grad_norm": 0.2281986027956009,
      "learning_rate": 4.285e-05,
      "loss": 0.9394,
      "step": 286
    },
    {
      "epoch": 0.008785868374604783,
      "grad_norm": 0.27514564990997314,
      "learning_rate": 4.2825000000000004e-05,
      "loss": 1.1203,
      "step": 287
    },
    {
      "epoch": 0.008816481156397831,
      "grad_norm": 0.281759649515152,
      "learning_rate": 4.2800000000000004e-05,
      "loss": 1.0542,
      "step": 288
    },
    {
      "epoch": 0.00884709393819088,
      "grad_norm": 0.23493672907352448,
      "learning_rate": 4.2775e-05,
      "loss": 1.0645,
      "step": 289
    },
    {
      "epoch": 0.008877706719983927,
      "grad_norm": 0.21564684808254242,
      "learning_rate": 4.275e-05,
      "loss": 0.9777,
      "step": 290
    },
    {
      "epoch": 0.008908319501776975,
      "grad_norm": 0.3044259548187256,
      "learning_rate": 4.2725e-05,
      "loss": 0.9508,
      "step": 291
    },
    {
      "epoch": 0.008938932283570023,
      "grad_norm": 0.43652230501174927,
      "learning_rate": 4.27e-05,
      "loss": 1.1147,
      "step": 292
    },
    {
      "epoch": 0.008969545065363073,
      "grad_norm": 0.3009858727455139,
      "learning_rate": 4.2675e-05,
      "loss": 1.0028,
      "step": 293
    },
    {
      "epoch": 0.009000157847156121,
      "grad_norm": 0.30155524611473083,
      "learning_rate": 4.265e-05,
      "loss": 1.0876,
      "step": 294
    },
    {
      "epoch": 0.00903077062894917,
      "grad_norm": 0.2550355792045593,
      "learning_rate": 4.2625000000000006e-05,
      "loss": 1.0466,
      "step": 295
    },
    {
      "epoch": 0.009061383410742217,
      "grad_norm": 0.30369213223457336,
      "learning_rate": 4.26e-05,
      "loss": 1.0555,
      "step": 296
    },
    {
      "epoch": 0.009091996192535265,
      "grad_norm": 0.2777206003665924,
      "learning_rate": 4.2575000000000005e-05,
      "loss": 0.9675,
      "step": 297
    },
    {
      "epoch": 0.009122608974328313,
      "grad_norm": 0.2704721689224243,
      "learning_rate": 4.2550000000000004e-05,
      "loss": 1.108,
      "step": 298
    },
    {
      "epoch": 0.009153221756121361,
      "grad_norm": 0.28492510318756104,
      "learning_rate": 4.2525000000000004e-05,
      "loss": 1.126,
      "step": 299
    },
    {
      "epoch": 0.00918383453791441,
      "grad_norm": 0.20624636113643646,
      "learning_rate": 4.25e-05,
      "loss": 0.8756,
      "step": 300
    },
    {
      "epoch": 0.009214447319707457,
      "grad_norm": 0.25993070006370544,
      "learning_rate": 4.2475e-05,
      "loss": 1.0429,
      "step": 301
    },
    {
      "epoch": 0.009245060101500505,
      "grad_norm": 0.3287903666496277,
      "learning_rate": 4.245e-05,
      "loss": 1.2229,
      "step": 302
    },
    {
      "epoch": 0.009275672883293553,
      "grad_norm": 0.3433060348033905,
      "learning_rate": 4.2425e-05,
      "loss": 1.1401,
      "step": 303
    },
    {
      "epoch": 0.009306285665086601,
      "grad_norm": 0.25451621413230896,
      "learning_rate": 4.24e-05,
      "loss": 0.9511,
      "step": 304
    },
    {
      "epoch": 0.00933689844687965,
      "grad_norm": 0.26404258608818054,
      "learning_rate": 4.237500000000001e-05,
      "loss": 1.0965,
      "step": 305
    },
    {
      "epoch": 0.009367511228672697,
      "grad_norm": 0.3250214159488678,
      "learning_rate": 4.235e-05,
      "loss": 1.2837,
      "step": 306
    },
    {
      "epoch": 0.009398124010465745,
      "grad_norm": 0.26017168164253235,
      "learning_rate": 4.2325000000000006e-05,
      "loss": 1.0189,
      "step": 307
    },
    {
      "epoch": 0.009428736792258793,
      "grad_norm": 0.3346084654331207,
      "learning_rate": 4.23e-05,
      "loss": 1.1779,
      "step": 308
    },
    {
      "epoch": 0.009459349574051841,
      "grad_norm": 0.2283959984779358,
      "learning_rate": 4.2275000000000004e-05,
      "loss": 0.9156,
      "step": 309
    },
    {
      "epoch": 0.00948996235584489,
      "grad_norm": 0.24887420237064362,
      "learning_rate": 4.2250000000000004e-05,
      "loss": 1.1497,
      "step": 310
    },
    {
      "epoch": 0.009520575137637937,
      "grad_norm": 0.27041831612586975,
      "learning_rate": 4.2225e-05,
      "loss": 1.1373,
      "step": 311
    },
    {
      "epoch": 0.009551187919430985,
      "grad_norm": 0.2696216404438019,
      "learning_rate": 4.22e-05,
      "loss": 0.9968,
      "step": 312
    },
    {
      "epoch": 0.009581800701224033,
      "grad_norm": 0.24443794786930084,
      "learning_rate": 4.2175e-05,
      "loss": 1.0381,
      "step": 313
    },
    {
      "epoch": 0.009612413483017081,
      "grad_norm": 0.23978783190250397,
      "learning_rate": 4.215e-05,
      "loss": 1.0948,
      "step": 314
    },
    {
      "epoch": 0.00964302626481013,
      "grad_norm": 0.2490464448928833,
      "learning_rate": 4.2125e-05,
      "loss": 1.0182,
      "step": 315
    },
    {
      "epoch": 0.009673639046603177,
      "grad_norm": 0.23054780066013336,
      "learning_rate": 4.21e-05,
      "loss": 0.9366,
      "step": 316
    },
    {
      "epoch": 0.009704251828396225,
      "grad_norm": 0.5445377826690674,
      "learning_rate": 4.2075000000000006e-05,
      "loss": 0.996,
      "step": 317
    },
    {
      "epoch": 0.009734864610189273,
      "grad_norm": 0.24003981053829193,
      "learning_rate": 4.205e-05,
      "loss": 1.0148,
      "step": 318
    },
    {
      "epoch": 0.009765477391982321,
      "grad_norm": 0.3355776071548462,
      "learning_rate": 4.2025000000000005e-05,
      "loss": 1.1296,
      "step": 319
    },
    {
      "epoch": 0.00979609017377537,
      "grad_norm": 0.23706026375293732,
      "learning_rate": 4.2e-05,
      "loss": 1.1352,
      "step": 320
    },
    {
      "epoch": 0.009826702955568417,
      "grad_norm": 0.26666516065597534,
      "learning_rate": 4.1975000000000004e-05,
      "loss": 0.935,
      "step": 321
    },
    {
      "epoch": 0.009857315737361465,
      "grad_norm": 0.28790584206581116,
      "learning_rate": 4.195e-05,
      "loss": 1.0929,
      "step": 322
    },
    {
      "epoch": 0.009887928519154513,
      "grad_norm": 0.24307578802108765,
      "learning_rate": 4.1925e-05,
      "loss": 0.9137,
      "step": 323
    },
    {
      "epoch": 0.009918541300947561,
      "grad_norm": 0.2743045687675476,
      "learning_rate": 4.19e-05,
      "loss": 1.1207,
      "step": 324
    },
    {
      "epoch": 0.00994915408274061,
      "grad_norm": 0.27031800150871277,
      "learning_rate": 4.1875e-05,
      "loss": 1.0931,
      "step": 325
    },
    {
      "epoch": 0.009979766864533657,
      "grad_norm": 0.3189777135848999,
      "learning_rate": 4.185e-05,
      "loss": 1.1267,
      "step": 326
    },
    {
      "epoch": 0.010010379646326706,
      "grad_norm": 0.36340323090553284,
      "learning_rate": 4.1825e-05,
      "loss": 1.2711,
      "step": 327
    },
    {
      "epoch": 0.010040992428119754,
      "grad_norm": 0.31963106989860535,
      "learning_rate": 4.18e-05,
      "loss": 1.0644,
      "step": 328
    },
    {
      "epoch": 0.010071605209912802,
      "grad_norm": 0.3205423951148987,
      "learning_rate": 4.1775000000000006e-05,
      "loss": 1.1508,
      "step": 329
    },
    {
      "epoch": 0.01010221799170585,
      "grad_norm": 0.6373870968818665,
      "learning_rate": 4.175e-05,
      "loss": 1.0019,
      "step": 330
    },
    {
      "epoch": 0.010132830773498898,
      "grad_norm": 0.27367815375328064,
      "learning_rate": 4.1725000000000005e-05,
      "loss": 1.0039,
      "step": 331
    },
    {
      "epoch": 0.010163443555291946,
      "grad_norm": 0.25735175609588623,
      "learning_rate": 4.17e-05,
      "loss": 1.0507,
      "step": 332
    },
    {
      "epoch": 0.010194056337084994,
      "grad_norm": 0.3167512118816376,
      "learning_rate": 4.1675e-05,
      "loss": 0.9917,
      "step": 333
    },
    {
      "epoch": 0.010224669118878042,
      "grad_norm": 0.2874915301799774,
      "learning_rate": 4.165e-05,
      "loss": 1.0603,
      "step": 334
    },
    {
      "epoch": 0.01025528190067109,
      "grad_norm": 0.2256983071565628,
      "learning_rate": 4.1625e-05,
      "loss": 0.9352,
      "step": 335
    },
    {
      "epoch": 0.010285894682464138,
      "grad_norm": 0.30917415022850037,
      "learning_rate": 4.16e-05,
      "loss": 1.1568,
      "step": 336
    },
    {
      "epoch": 0.010316507464257186,
      "grad_norm": 0.274726003408432,
      "learning_rate": 4.1575e-05,
      "loss": 1.0085,
      "step": 337
    },
    {
      "epoch": 0.010347120246050234,
      "grad_norm": 0.2276756316423416,
      "learning_rate": 4.155e-05,
      "loss": 0.9877,
      "step": 338
    },
    {
      "epoch": 0.010377733027843282,
      "grad_norm": 0.252692848443985,
      "learning_rate": 4.1525e-05,
      "loss": 1.108,
      "step": 339
    },
    {
      "epoch": 0.01040834580963633,
      "grad_norm": 0.5832369327545166,
      "learning_rate": 4.15e-05,
      "loss": 1.0425,
      "step": 340
    },
    {
      "epoch": 0.010438958591429378,
      "grad_norm": 0.3051837384700775,
      "learning_rate": 4.1475000000000005e-05,
      "loss": 1.1144,
      "step": 341
    },
    {
      "epoch": 0.010469571373222426,
      "grad_norm": 0.2877141535282135,
      "learning_rate": 4.145e-05,
      "loss": 1.0702,
      "step": 342
    },
    {
      "epoch": 0.010500184155015474,
      "grad_norm": 0.258938193321228,
      "learning_rate": 4.1425000000000004e-05,
      "loss": 1.0,
      "step": 343
    },
    {
      "epoch": 0.010530796936808522,
      "grad_norm": 0.3507537543773651,
      "learning_rate": 4.14e-05,
      "loss": 1.2649,
      "step": 344
    },
    {
      "epoch": 0.01056140971860157,
      "grad_norm": 0.264544278383255,
      "learning_rate": 4.1375e-05,
      "loss": 0.9883,
      "step": 345
    },
    {
      "epoch": 0.010592022500394618,
      "grad_norm": 0.29684978723526,
      "learning_rate": 4.135e-05,
      "loss": 0.9828,
      "step": 346
    },
    {
      "epoch": 0.010622635282187666,
      "grad_norm": 0.2987184524536133,
      "learning_rate": 4.1325e-05,
      "loss": 1.1564,
      "step": 347
    },
    {
      "epoch": 0.010653248063980714,
      "grad_norm": 0.21890373528003693,
      "learning_rate": 4.13e-05,
      "loss": 0.897,
      "step": 348
    },
    {
      "epoch": 0.010683860845773762,
      "grad_norm": 0.38717278838157654,
      "learning_rate": 4.1275e-05,
      "loss": 0.9153,
      "step": 349
    },
    {
      "epoch": 0.01071447362756681,
      "grad_norm": 0.3077659010887146,
      "learning_rate": 4.125e-05,
      "loss": 0.9971,
      "step": 350
    },
    {
      "epoch": 0.010745086409359858,
      "grad_norm": 0.232235386967659,
      "learning_rate": 4.1225e-05,
      "loss": 0.9472,
      "step": 351
    },
    {
      "epoch": 0.010775699191152906,
      "grad_norm": 0.40856805443763733,
      "learning_rate": 4.12e-05,
      "loss": 1.083,
      "step": 352
    },
    {
      "epoch": 0.010806311972945954,
      "grad_norm": 0.3207026422023773,
      "learning_rate": 4.1175000000000005e-05,
      "loss": 1.0246,
      "step": 353
    },
    {
      "epoch": 0.010836924754739002,
      "grad_norm": 0.4737738072872162,
      "learning_rate": 4.115e-05,
      "loss": 0.9719,
      "step": 354
    },
    {
      "epoch": 0.01086753753653205,
      "grad_norm": 0.2919338643550873,
      "learning_rate": 4.1125000000000004e-05,
      "loss": 1.1687,
      "step": 355
    },
    {
      "epoch": 0.010898150318325098,
      "grad_norm": 0.21928595006465912,
      "learning_rate": 4.11e-05,
      "loss": 0.8446,
      "step": 356
    },
    {
      "epoch": 0.010928763100118146,
      "grad_norm": 0.3204161822795868,
      "learning_rate": 4.1075e-05,
      "loss": 1.0173,
      "step": 357
    },
    {
      "epoch": 0.010959375881911194,
      "grad_norm": 0.3021047115325928,
      "learning_rate": 4.105e-05,
      "loss": 1.0748,
      "step": 358
    },
    {
      "epoch": 0.010989988663704242,
      "grad_norm": 0.6114902496337891,
      "learning_rate": 4.1025e-05,
      "loss": 1.1048,
      "step": 359
    },
    {
      "epoch": 0.01102060144549729,
      "grad_norm": 0.2648085653781891,
      "learning_rate": 4.1e-05,
      "loss": 0.8526,
      "step": 360
    },
    {
      "epoch": 0.011051214227290338,
      "grad_norm": 0.21445344388484955,
      "learning_rate": 4.0975e-05,
      "loss": 0.954,
      "step": 361
    },
    {
      "epoch": 0.011081827009083386,
      "grad_norm": 0.35280972719192505,
      "learning_rate": 4.095e-05,
      "loss": 0.9796,
      "step": 362
    },
    {
      "epoch": 0.011112439790876434,
      "grad_norm": 0.44290852546691895,
      "learning_rate": 4.0925000000000005e-05,
      "loss": 0.9592,
      "step": 363
    },
    {
      "epoch": 0.011143052572669482,
      "grad_norm": 0.2127533107995987,
      "learning_rate": 4.09e-05,
      "loss": 0.9467,
      "step": 364
    },
    {
      "epoch": 0.01117366535446253,
      "grad_norm": 0.3234565556049347,
      "learning_rate": 4.0875000000000004e-05,
      "loss": 0.9247,
      "step": 365
    },
    {
      "epoch": 0.011204278136255578,
      "grad_norm": 0.2790416181087494,
      "learning_rate": 4.085e-05,
      "loss": 1.0419,
      "step": 366
    },
    {
      "epoch": 0.011234890918048626,
      "grad_norm": 0.23793195188045502,
      "learning_rate": 4.0825e-05,
      "loss": 0.9674,
      "step": 367
    },
    {
      "epoch": 0.011265503699841674,
      "grad_norm": 0.35149502754211426,
      "learning_rate": 4.08e-05,
      "loss": 1.0184,
      "step": 368
    },
    {
      "epoch": 0.011296116481634722,
      "grad_norm": 0.6346878409385681,
      "learning_rate": 4.0775e-05,
      "loss": 0.9318,
      "step": 369
    },
    {
      "epoch": 0.01132672926342777,
      "grad_norm": 0.31394344568252563,
      "learning_rate": 4.075e-05,
      "loss": 1.0438,
      "step": 370
    },
    {
      "epoch": 0.011357342045220818,
      "grad_norm": 0.28430941700935364,
      "learning_rate": 4.0725e-05,
      "loss": 0.8807,
      "step": 371
    },
    {
      "epoch": 0.011387954827013866,
      "grad_norm": 0.3925749957561493,
      "learning_rate": 4.07e-05,
      "loss": 0.9129,
      "step": 372
    },
    {
      "epoch": 0.011418567608806914,
      "grad_norm": 0.30308136343955994,
      "learning_rate": 4.0675e-05,
      "loss": 1.0872,
      "step": 373
    },
    {
      "epoch": 0.011449180390599962,
      "grad_norm": 0.2732880115509033,
      "learning_rate": 4.065e-05,
      "loss": 1.015,
      "step": 374
    },
    {
      "epoch": 0.01147979317239301,
      "grad_norm": 0.6180379986763,
      "learning_rate": 4.0625000000000005e-05,
      "loss": 0.989,
      "step": 375
    },
    {
      "epoch": 0.011510405954186058,
      "grad_norm": 0.2653963565826416,
      "learning_rate": 4.0600000000000004e-05,
      "loss": 1.0753,
      "step": 376
    },
    {
      "epoch": 0.011541018735979108,
      "grad_norm": 0.27965572476387024,
      "learning_rate": 4.0575000000000004e-05,
      "loss": 0.8819,
      "step": 377
    },
    {
      "epoch": 0.011571631517772156,
      "grad_norm": 0.40261954069137573,
      "learning_rate": 4.055e-05,
      "loss": 1.1389,
      "step": 378
    },
    {
      "epoch": 0.011602244299565204,
      "grad_norm": 0.36262914538383484,
      "learning_rate": 4.0525e-05,
      "loss": 1.0394,
      "step": 379
    },
    {
      "epoch": 0.011632857081358252,
      "grad_norm": 0.41014599800109863,
      "learning_rate": 4.05e-05,
      "loss": 0.9309,
      "step": 380
    },
    {
      "epoch": 0.0116634698631513,
      "grad_norm": 0.355220764875412,
      "learning_rate": 4.0475e-05,
      "loss": 1.1612,
      "step": 381
    },
    {
      "epoch": 0.011694082644944348,
      "grad_norm": 0.29249659180641174,
      "learning_rate": 4.045000000000001e-05,
      "loss": 1.0615,
      "step": 382
    },
    {
      "epoch": 0.011724695426737396,
      "grad_norm": 0.33440911769866943,
      "learning_rate": 4.0425e-05,
      "loss": 0.8783,
      "step": 383
    },
    {
      "epoch": 0.011755308208530444,
      "grad_norm": 0.3766249716281891,
      "learning_rate": 4.0400000000000006e-05,
      "loss": 0.849,
      "step": 384
    },
    {
      "epoch": 0.011785920990323492,
      "grad_norm": 0.32901686429977417,
      "learning_rate": 4.0375e-05,
      "loss": 0.7778,
      "step": 385
    },
    {
      "epoch": 0.01181653377211654,
      "grad_norm": 0.2320515364408493,
      "learning_rate": 4.0350000000000005e-05,
      "loss": 0.9762,
      "step": 386
    },
    {
      "epoch": 0.011847146553909588,
      "grad_norm": 0.6319522857666016,
      "learning_rate": 4.0325000000000004e-05,
      "loss": 1.0107,
      "step": 387
    },
    {
      "epoch": 0.011877759335702636,
      "grad_norm": 0.39943617582321167,
      "learning_rate": 4.0300000000000004e-05,
      "loss": 0.9612,
      "step": 388
    },
    {
      "epoch": 0.011908372117495684,
      "grad_norm": 0.4257621169090271,
      "learning_rate": 4.0275e-05,
      "loss": 1.0245,
      "step": 389
    },
    {
      "epoch": 0.011938984899288732,
      "grad_norm": 0.2492171972990036,
      "learning_rate": 4.025e-05,
      "loss": 1.0031,
      "step": 390
    },
    {
      "epoch": 0.01196959768108178,
      "grad_norm": 0.4312019944190979,
      "learning_rate": 4.0225e-05,
      "loss": 1.2716,
      "step": 391
    },
    {
      "epoch": 0.012000210462874828,
      "grad_norm": 0.3257039189338684,
      "learning_rate": 4.02e-05,
      "loss": 0.9451,
      "step": 392
    },
    {
      "epoch": 0.012030823244667876,
      "grad_norm": 0.35243457555770874,
      "learning_rate": 4.0175e-05,
      "loss": 1.0165,
      "step": 393
    },
    {
      "epoch": 0.012061436026460924,
      "grad_norm": 0.2617169916629791,
      "learning_rate": 4.015000000000001e-05,
      "loss": 0.8439,
      "step": 394
    },
    {
      "epoch": 0.012092048808253972,
      "grad_norm": 0.5033875107765198,
      "learning_rate": 4.0125e-05,
      "loss": 0.7625,
      "step": 395
    },
    {
      "epoch": 0.01212266159004702,
      "grad_norm": 1.3827236890792847,
      "learning_rate": 4.0100000000000006e-05,
      "loss": 0.9822,
      "step": 396
    },
    {
      "epoch": 0.012153274371840068,
      "grad_norm": 0.4073053002357483,
      "learning_rate": 4.0075e-05,
      "loss": 0.8972,
      "step": 397
    },
    {
      "epoch": 0.012183887153633116,
      "grad_norm": 0.27170345187187195,
      "learning_rate": 4.0050000000000004e-05,
      "loss": 0.9558,
      "step": 398
    },
    {
      "epoch": 0.012214499935426164,
      "grad_norm": 0.3167583644390106,
      "learning_rate": 4.0025000000000004e-05,
      "loss": 1.0337,
      "step": 399
    },
    {
      "epoch": 0.012245112717219212,
      "grad_norm": 0.544268012046814,
      "learning_rate": 4e-05,
      "loss": 1.0785,
      "step": 400
    },
    {
      "epoch": 0.01227572549901226,
      "grad_norm": 0.28161484003067017,
      "learning_rate": 3.9975e-05,
      "loss": 1.0543,
      "step": 401
    },
    {
      "epoch": 0.012306338280805308,
      "grad_norm": 0.4505917727947235,
      "learning_rate": 3.995e-05,
      "loss": 0.9525,
      "step": 402
    },
    {
      "epoch": 0.012336951062598356,
      "grad_norm": 0.4082527756690979,
      "learning_rate": 3.9925e-05,
      "loss": 0.8735,
      "step": 403
    },
    {
      "epoch": 0.012367563844391404,
      "grad_norm": 0.3509488105773926,
      "learning_rate": 3.99e-05,
      "loss": 0.8528,
      "step": 404
    },
    {
      "epoch": 0.012398176626184452,
      "grad_norm": 0.38037213683128357,
      "learning_rate": 3.9875e-05,
      "loss": 1.0697,
      "step": 405
    },
    {
      "epoch": 0.0124287894079775,
      "grad_norm": 0.21663141250610352,
      "learning_rate": 3.9850000000000006e-05,
      "loss": 0.9596,
      "step": 406
    },
    {
      "epoch": 0.012459402189770548,
      "grad_norm": 0.48131683468818665,
      "learning_rate": 3.9825e-05,
      "loss": 1.1302,
      "step": 407
    },
    {
      "epoch": 0.012490014971563596,
      "grad_norm": 0.348501056432724,
      "learning_rate": 3.9800000000000005e-05,
      "loss": 0.9674,
      "step": 408
    },
    {
      "epoch": 0.012520627753356644,
      "grad_norm": 0.3035363256931305,
      "learning_rate": 3.9775e-05,
      "loss": 0.9371,
      "step": 409
    },
    {
      "epoch": 0.012551240535149692,
      "grad_norm": 0.3224133849143982,
      "learning_rate": 3.9750000000000004e-05,
      "loss": 0.929,
      "step": 410
    },
    {
      "epoch": 0.01258185331694274,
      "grad_norm": 0.2708964943885803,
      "learning_rate": 3.9725e-05,
      "loss": 0.9643,
      "step": 411
    },
    {
      "epoch": 0.012612466098735788,
      "grad_norm": 0.27509114146232605,
      "learning_rate": 3.97e-05,
      "loss": 0.8899,
      "step": 412
    },
    {
      "epoch": 0.012643078880528836,
      "grad_norm": 0.34297844767570496,
      "learning_rate": 3.9675e-05,
      "loss": 0.9002,
      "step": 413
    },
    {
      "epoch": 0.012673691662321884,
      "grad_norm": 0.24827967584133148,
      "learning_rate": 3.965e-05,
      "loss": 0.9636,
      "step": 414
    },
    {
      "epoch": 0.012704304444114932,
      "grad_norm": 0.3533557653427124,
      "learning_rate": 3.9625e-05,
      "loss": 0.9839,
      "step": 415
    },
    {
      "epoch": 0.01273491722590798,
      "grad_norm": 0.5620437860488892,
      "learning_rate": 3.960000000000001e-05,
      "loss": 1.108,
      "step": 416
    },
    {
      "epoch": 0.012765530007701028,
      "grad_norm": 0.36419570446014404,
      "learning_rate": 3.9575e-05,
      "loss": 0.8566,
      "step": 417
    },
    {
      "epoch": 0.012796142789494076,
      "grad_norm": 0.41746291518211365,
      "learning_rate": 3.9550000000000006e-05,
      "loss": 0.8512,
      "step": 418
    },
    {
      "epoch": 0.012826755571287124,
      "grad_norm": 0.4195747971534729,
      "learning_rate": 3.9525e-05,
      "loss": 1.033,
      "step": 419
    },
    {
      "epoch": 0.012857368353080172,
      "grad_norm": 0.2939501702785492,
      "learning_rate": 3.9500000000000005e-05,
      "loss": 0.9093,
      "step": 420
    },
    {
      "epoch": 0.01288798113487322,
      "grad_norm": 0.33540424704551697,
      "learning_rate": 3.9475000000000004e-05,
      "loss": 0.9786,
      "step": 421
    },
    {
      "epoch": 0.012918593916666268,
      "grad_norm": 0.1995973140001297,
      "learning_rate": 3.9450000000000003e-05,
      "loss": 0.8087,
      "step": 422
    },
    {
      "epoch": 0.012949206698459316,
      "grad_norm": 0.29084113240242004,
      "learning_rate": 3.9425e-05,
      "loss": 0.9437,
      "step": 423
    },
    {
      "epoch": 0.012979819480252364,
      "grad_norm": 0.641983687877655,
      "learning_rate": 3.94e-05,
      "loss": 0.961,
      "step": 424
    },
    {
      "epoch": 0.013010432262045412,
      "grad_norm": 0.297233909368515,
      "learning_rate": 3.9375e-05,
      "loss": 0.9369,
      "step": 425
    },
    {
      "epoch": 0.01304104504383846,
      "grad_norm": 0.33953672647476196,
      "learning_rate": 3.935e-05,
      "loss": 1.0655,
      "step": 426
    },
    {
      "epoch": 0.013071657825631508,
      "grad_norm": 0.4369617700576782,
      "learning_rate": 3.9325e-05,
      "loss": 0.9956,
      "step": 427
    },
    {
      "epoch": 0.013102270607424556,
      "grad_norm": 0.35427936911582947,
      "learning_rate": 3.9300000000000007e-05,
      "loss": 0.9056,
      "step": 428
    },
    {
      "epoch": 0.013132883389217604,
      "grad_norm": 0.4321453273296356,
      "learning_rate": 3.9275e-05,
      "loss": 1.1412,
      "step": 429
    },
    {
      "epoch": 0.013163496171010652,
      "grad_norm": 0.35322269797325134,
      "learning_rate": 3.9250000000000005e-05,
      "loss": 0.9263,
      "step": 430
    },
    {
      "epoch": 0.0131941089528037,
      "grad_norm": 0.39910754561424255,
      "learning_rate": 3.9225e-05,
      "loss": 0.9147,
      "step": 431
    },
    {
      "epoch": 0.013224721734596748,
      "grad_norm": 0.2874854505062103,
      "learning_rate": 3.9200000000000004e-05,
      "loss": 0.8875,
      "step": 432
    },
    {
      "epoch": 0.013255334516389796,
      "grad_norm": 0.4301827847957611,
      "learning_rate": 3.9175000000000004e-05,
      "loss": 0.9276,
      "step": 433
    },
    {
      "epoch": 0.013285947298182844,
      "grad_norm": 0.5097507238388062,
      "learning_rate": 3.915e-05,
      "loss": 0.9903,
      "step": 434
    },
    {
      "epoch": 0.013316560079975892,
      "grad_norm": 0.3851584792137146,
      "learning_rate": 3.9125e-05,
      "loss": 0.9988,
      "step": 435
    },
    {
      "epoch": 0.01334717286176894,
      "grad_norm": 0.2661309540271759,
      "learning_rate": 3.91e-05,
      "loss": 0.9416,
      "step": 436
    },
    {
      "epoch": 0.013377785643561988,
      "grad_norm": 0.26433101296424866,
      "learning_rate": 3.9075e-05,
      "loss": 0.8406,
      "step": 437
    },
    {
      "epoch": 0.013408398425355036,
      "grad_norm": 0.2906126379966736,
      "learning_rate": 3.905e-05,
      "loss": 0.8064,
      "step": 438
    },
    {
      "epoch": 0.013439011207148084,
      "grad_norm": 0.40845009684562683,
      "learning_rate": 3.9025e-05,
      "loss": 0.8792,
      "step": 439
    },
    {
      "epoch": 0.013469623988941132,
      "grad_norm": 0.4975122809410095,
      "learning_rate": 3.9000000000000006e-05,
      "loss": 1.1226,
      "step": 440
    },
    {
      "epoch": 0.01350023677073418,
      "grad_norm": 0.302641361951828,
      "learning_rate": 3.8975e-05,
      "loss": 0.8366,
      "step": 441
    },
    {
      "epoch": 0.013530849552527228,
      "grad_norm": 0.31614184379577637,
      "learning_rate": 3.8950000000000005e-05,
      "loss": 0.8903,
      "step": 442
    },
    {
      "epoch": 0.013561462334320276,
      "grad_norm": 0.3621780276298523,
      "learning_rate": 3.8925e-05,
      "loss": 0.8711,
      "step": 443
    },
    {
      "epoch": 0.013592075116113324,
      "grad_norm": 0.382834792137146,
      "learning_rate": 3.8900000000000004e-05,
      "loss": 0.9935,
      "step": 444
    },
    {
      "epoch": 0.013622687897906372,
      "grad_norm": 0.27569636702537537,
      "learning_rate": 3.8875e-05,
      "loss": 0.9469,
      "step": 445
    },
    {
      "epoch": 0.01365330067969942,
      "grad_norm": 0.36804407835006714,
      "learning_rate": 3.885e-05,
      "loss": 0.7932,
      "step": 446
    },
    {
      "epoch": 0.013683913461492468,
      "grad_norm": 0.3369120657444,
      "learning_rate": 3.8825e-05,
      "loss": 1.0091,
      "step": 447
    },
    {
      "epoch": 0.013714526243285516,
      "grad_norm": 1.1327863931655884,
      "learning_rate": 3.88e-05,
      "loss": 0.8878,
      "step": 448
    },
    {
      "epoch": 0.013745139025078564,
      "grad_norm": 0.2913079559803009,
      "learning_rate": 3.8775e-05,
      "loss": 0.9454,
      "step": 449
    },
    {
      "epoch": 0.013775751806871612,
      "grad_norm": 0.4744071364402771,
      "learning_rate": 3.875e-05,
      "loss": 1.0158,
      "step": 450
    },
    {
      "epoch": 0.01380636458866466,
      "grad_norm": 0.2775246202945709,
      "learning_rate": 3.8725e-05,
      "loss": 0.8923,
      "step": 451
    },
    {
      "epoch": 0.013836977370457708,
      "grad_norm": 0.5471243858337402,
      "learning_rate": 3.8700000000000006e-05,
      "loss": 0.9794,
      "step": 452
    },
    {
      "epoch": 0.013867590152250756,
      "grad_norm": 0.3521736264228821,
      "learning_rate": 3.8675e-05,
      "loss": 0.9239,
      "step": 453
    },
    {
      "epoch": 0.013898202934043804,
      "grad_norm": 0.41156408190727234,
      "learning_rate": 3.8650000000000004e-05,
      "loss": 1.0331,
      "step": 454
    },
    {
      "epoch": 0.013928815715836852,
      "grad_norm": 0.28897494077682495,
      "learning_rate": 3.8625e-05,
      "loss": 0.8079,
      "step": 455
    },
    {
      "epoch": 0.0139594284976299,
      "grad_norm": 0.31795135140419006,
      "learning_rate": 3.86e-05,
      "loss": 0.9521,
      "step": 456
    },
    {
      "epoch": 0.013990041279422948,
      "grad_norm": 0.324773371219635,
      "learning_rate": 3.8575e-05,
      "loss": 0.883,
      "step": 457
    },
    {
      "epoch": 0.014020654061215996,
      "grad_norm": 0.3187577724456787,
      "learning_rate": 3.855e-05,
      "loss": 0.8336,
      "step": 458
    },
    {
      "epoch": 0.014051266843009044,
      "grad_norm": 0.31265607476234436,
      "learning_rate": 3.8525e-05,
      "loss": 0.9799,
      "step": 459
    },
    {
      "epoch": 0.014081879624802094,
      "grad_norm": 0.2519436478614807,
      "learning_rate": 3.85e-05,
      "loss": 0.8373,
      "step": 460
    },
    {
      "epoch": 0.014112492406595142,
      "grad_norm": 0.29184338450431824,
      "learning_rate": 3.8475e-05,
      "loss": 0.7517,
      "step": 461
    },
    {
      "epoch": 0.01414310518838819,
      "grad_norm": 0.3224530518054962,
      "learning_rate": 3.845e-05,
      "loss": 1.0285,
      "step": 462
    },
    {
      "epoch": 0.014173717970181238,
      "grad_norm": 0.4983194172382355,
      "learning_rate": 3.8425e-05,
      "loss": 1.0185,
      "step": 463
    },
    {
      "epoch": 0.014204330751974286,
      "grad_norm": 0.2955765426158905,
      "learning_rate": 3.8400000000000005e-05,
      "loss": 0.8965,
      "step": 464
    },
    {
      "epoch": 0.014234943533767334,
      "grad_norm": 0.45806610584259033,
      "learning_rate": 3.8375e-05,
      "loss": 1.0827,
      "step": 465
    },
    {
      "epoch": 0.014265556315560382,
      "grad_norm": 0.25348609685897827,
      "learning_rate": 3.8350000000000004e-05,
      "loss": 0.8858,
      "step": 466
    },
    {
      "epoch": 0.01429616909735343,
      "grad_norm": 0.5292565822601318,
      "learning_rate": 3.8324999999999996e-05,
      "loss": 0.9476,
      "step": 467
    },
    {
      "epoch": 0.014326781879146478,
      "grad_norm": 0.315755158662796,
      "learning_rate": 3.83e-05,
      "loss": 0.9681,
      "step": 468
    },
    {
      "epoch": 0.014357394660939526,
      "grad_norm": 0.6082099676132202,
      "learning_rate": 3.8275e-05,
      "loss": 0.9584,
      "step": 469
    },
    {
      "epoch": 0.014388007442732574,
      "grad_norm": 0.29147374629974365,
      "learning_rate": 3.825e-05,
      "loss": 0.944,
      "step": 470
    },
    {
      "epoch": 0.014418620224525622,
      "grad_norm": 0.47573500871658325,
      "learning_rate": 3.8225e-05,
      "loss": 0.856,
      "step": 471
    },
    {
      "epoch": 0.01444923300631867,
      "grad_norm": 1.0226668119430542,
      "learning_rate": 3.82e-05,
      "loss": 0.9995,
      "step": 472
    },
    {
      "epoch": 0.014479845788111718,
      "grad_norm": 0.4918708801269531,
      "learning_rate": 3.8175e-05,
      "loss": 1.0321,
      "step": 473
    },
    {
      "epoch": 0.014510458569904766,
      "grad_norm": 0.27392539381980896,
      "learning_rate": 3.8150000000000006e-05,
      "loss": 0.7431,
      "step": 474
    },
    {
      "epoch": 0.014541071351697814,
      "grad_norm": 0.21749022603034973,
      "learning_rate": 3.8125e-05,
      "loss": 0.8502,
      "step": 475
    },
    {
      "epoch": 0.014571684133490862,
      "grad_norm": 0.31534332036972046,
      "learning_rate": 3.8100000000000005e-05,
      "loss": 0.8597,
      "step": 476
    },
    {
      "epoch": 0.01460229691528391,
      "grad_norm": 0.2787284255027771,
      "learning_rate": 3.8075e-05,
      "loss": 0.7889,
      "step": 477
    },
    {
      "epoch": 0.014632909697076958,
      "grad_norm": 0.7034900188446045,
      "learning_rate": 3.805e-05,
      "loss": 0.7455,
      "step": 478
    },
    {
      "epoch": 0.014663522478870006,
      "grad_norm": 0.3725343644618988,
      "learning_rate": 3.8025e-05,
      "loss": 0.8285,
      "step": 479
    },
    {
      "epoch": 0.014694135260663054,
      "grad_norm": 0.36987271904945374,
      "learning_rate": 3.8e-05,
      "loss": 0.8445,
      "step": 480
    },
    {
      "epoch": 0.014724748042456102,
      "grad_norm": 0.42520707845687866,
      "learning_rate": 3.7975e-05,
      "loss": 0.9946,
      "step": 481
    },
    {
      "epoch": 0.01475536082424915,
      "grad_norm": 0.4494650959968567,
      "learning_rate": 3.795e-05,
      "loss": 0.9314,
      "step": 482
    },
    {
      "epoch": 0.014785973606042198,
      "grad_norm": 0.3106038272380829,
      "learning_rate": 3.7925e-05,
      "loss": 0.9488,
      "step": 483
    },
    {
      "epoch": 0.014816586387835246,
      "grad_norm": 0.4342615008354187,
      "learning_rate": 3.79e-05,
      "loss": 0.69,
      "step": 484
    },
    {
      "epoch": 0.014847199169628294,
      "grad_norm": 0.3828924298286438,
      "learning_rate": 3.7875e-05,
      "loss": 0.9077,
      "step": 485
    },
    {
      "epoch": 0.014877811951421342,
      "grad_norm": 0.47601813077926636,
      "learning_rate": 3.7850000000000005e-05,
      "loss": 0.9473,
      "step": 486
    },
    {
      "epoch": 0.01490842473321439,
      "grad_norm": 0.2929406464099884,
      "learning_rate": 3.7825e-05,
      "loss": 0.798,
      "step": 487
    },
    {
      "epoch": 0.014939037515007438,
      "grad_norm": 0.3211841285228729,
      "learning_rate": 3.7800000000000004e-05,
      "loss": 0.8013,
      "step": 488
    },
    {
      "epoch": 0.014969650296800486,
      "grad_norm": 0.2880224585533142,
      "learning_rate": 3.7775e-05,
      "loss": 0.7617,
      "step": 489
    },
    {
      "epoch": 0.015000263078593534,
      "grad_norm": 0.45974427461624146,
      "learning_rate": 3.775e-05,
      "loss": 0.815,
      "step": 490
    },
    {
      "epoch": 0.015030875860386582,
      "grad_norm": 0.3897387683391571,
      "learning_rate": 3.7725e-05,
      "loss": 0.9047,
      "step": 491
    },
    {
      "epoch": 0.01506148864217963,
      "grad_norm": 0.44781258702278137,
      "learning_rate": 3.77e-05,
      "loss": 1.0626,
      "step": 492
    },
    {
      "epoch": 0.015092101423972678,
      "grad_norm": 0.33484575152397156,
      "learning_rate": 3.7675e-05,
      "loss": 0.9962,
      "step": 493
    },
    {
      "epoch": 0.015122714205765726,
      "grad_norm": 0.4724823832511902,
      "learning_rate": 3.765e-05,
      "loss": 0.7623,
      "step": 494
    },
    {
      "epoch": 0.015153326987558774,
      "grad_norm": 0.41446638107299805,
      "learning_rate": 3.7625e-05,
      "loss": 0.9142,
      "step": 495
    },
    {
      "epoch": 0.015183939769351822,
      "grad_norm": 0.3486267924308777,
      "learning_rate": 3.76e-05,
      "loss": 0.828,
      "step": 496
    },
    {
      "epoch": 0.01521455255114487,
      "grad_norm": 0.9551460146903992,
      "learning_rate": 3.7575e-05,
      "loss": 1.0092,
      "step": 497
    },
    {
      "epoch": 0.015245165332937918,
      "grad_norm": 0.27103862166404724,
      "learning_rate": 3.7550000000000005e-05,
      "loss": 0.8388,
      "step": 498
    },
    {
      "epoch": 0.015275778114730966,
      "grad_norm": 0.368041068315506,
      "learning_rate": 3.7525e-05,
      "loss": 0.8794,
      "step": 499
    },
    {
      "epoch": 0.015306390896524014,
      "grad_norm": 0.4358421564102173,
      "learning_rate": 3.7500000000000003e-05,
      "loss": 1.0584,
      "step": 500
    },
    {
      "epoch": 0.015337003678317062,
      "grad_norm": 0.2618415653705597,
      "learning_rate": 3.7475e-05,
      "loss": 0.8781,
      "step": 501
    },
    {
      "epoch": 0.01536761646011011,
      "grad_norm": 0.37172091007232666,
      "learning_rate": 3.745e-05,
      "loss": 0.878,
      "step": 502
    },
    {
      "epoch": 0.015398229241903158,
      "grad_norm": 0.3034621477127075,
      "learning_rate": 3.7425e-05,
      "loss": 0.9033,
      "step": 503
    },
    {
      "epoch": 0.015428842023696206,
      "grad_norm": 0.7021600008010864,
      "learning_rate": 3.74e-05,
      "loss": 0.9087,
      "step": 504
    },
    {
      "epoch": 0.015459454805489254,
      "grad_norm": 0.33707287907600403,
      "learning_rate": 3.737500000000001e-05,
      "loss": 0.7835,
      "step": 505
    },
    {
      "epoch": 0.015490067587282302,
      "grad_norm": 4.638749599456787,
      "learning_rate": 3.735e-05,
      "loss": 0.8894,
      "step": 506
    },
    {
      "epoch": 0.01552068036907535,
      "grad_norm": 0.2884310781955719,
      "learning_rate": 3.7325000000000006e-05,
      "loss": 0.9444,
      "step": 507
    },
    {
      "epoch": 0.015551293150868398,
      "grad_norm": 0.31513479351997375,
      "learning_rate": 3.73e-05,
      "loss": 0.7662,
      "step": 508
    },
    {
      "epoch": 0.015581905932661446,
      "grad_norm": 0.31248098611831665,
      "learning_rate": 3.7275000000000005e-05,
      "loss": 0.7991,
      "step": 509
    },
    {
      "epoch": 0.015612518714454494,
      "grad_norm": 0.23449936509132385,
      "learning_rate": 3.7250000000000004e-05,
      "loss": 0.7318,
      "step": 510
    },
    {
      "epoch": 0.015643131496247544,
      "grad_norm": 0.767121434211731,
      "learning_rate": 3.7225000000000004e-05,
      "loss": 0.9087,
      "step": 511
    },
    {
      "epoch": 0.01567374427804059,
      "grad_norm": 0.5392833352088928,
      "learning_rate": 3.72e-05,
      "loss": 0.8572,
      "step": 512
    },
    {
      "epoch": 0.01570435705983364,
      "grad_norm": 1.024505376815796,
      "learning_rate": 3.7175e-05,
      "loss": 0.7917,
      "step": 513
    },
    {
      "epoch": 0.015734969841626686,
      "grad_norm": 0.23068782687187195,
      "learning_rate": 3.715e-05,
      "loss": 0.8422,
      "step": 514
    },
    {
      "epoch": 0.015765582623419736,
      "grad_norm": 0.3498382270336151,
      "learning_rate": 3.7125e-05,
      "loss": 0.9405,
      "step": 515
    },
    {
      "epoch": 0.015796195405212782,
      "grad_norm": 0.8079524040222168,
      "learning_rate": 3.71e-05,
      "loss": 0.8007,
      "step": 516
    },
    {
      "epoch": 0.015826808187005832,
      "grad_norm": 0.4474254250526428,
      "learning_rate": 3.707500000000001e-05,
      "loss": 0.8638,
      "step": 517
    },
    {
      "epoch": 0.01585742096879888,
      "grad_norm": 0.2972578704357147,
      "learning_rate": 3.705e-05,
      "loss": 0.8811,
      "step": 518
    },
    {
      "epoch": 0.015888033750591928,
      "grad_norm": 0.3927738070487976,
      "learning_rate": 3.7025000000000005e-05,
      "loss": 0.8872,
      "step": 519
    },
    {
      "epoch": 0.015918646532384974,
      "grad_norm": 0.2890996038913727,
      "learning_rate": 3.7e-05,
      "loss": 0.7789,
      "step": 520
    },
    {
      "epoch": 0.015949259314178024,
      "grad_norm": 0.27529260516166687,
      "learning_rate": 3.6975000000000004e-05,
      "loss": 0.845,
      "step": 521
    },
    {
      "epoch": 0.01597987209597107,
      "grad_norm": 0.3284979462623596,
      "learning_rate": 3.6950000000000004e-05,
      "loss": 0.9146,
      "step": 522
    },
    {
      "epoch": 0.01601048487776412,
      "grad_norm": 0.3008323609828949,
      "learning_rate": 3.6925e-05,
      "loss": 0.8513,
      "step": 523
    },
    {
      "epoch": 0.016041097659557167,
      "grad_norm": 0.39038926362991333,
      "learning_rate": 3.69e-05,
      "loss": 0.9117,
      "step": 524
    },
    {
      "epoch": 0.016071710441350216,
      "grad_norm": 0.4720897376537323,
      "learning_rate": 3.6875e-05,
      "loss": 0.8568,
      "step": 525
    },
    {
      "epoch": 0.016102323223143263,
      "grad_norm": 0.27326008677482605,
      "learning_rate": 3.685e-05,
      "loss": 0.9808,
      "step": 526
    },
    {
      "epoch": 0.016132936004936312,
      "grad_norm": 0.31973448395729065,
      "learning_rate": 3.6825e-05,
      "loss": 0.9068,
      "step": 527
    },
    {
      "epoch": 0.01616354878672936,
      "grad_norm": 0.4367961585521698,
      "learning_rate": 3.68e-05,
      "loss": 0.8865,
      "step": 528
    },
    {
      "epoch": 0.01619416156852241,
      "grad_norm": 0.3773605525493622,
      "learning_rate": 3.6775000000000006e-05,
      "loss": 0.8952,
      "step": 529
    },
    {
      "epoch": 0.016224774350315455,
      "grad_norm": 0.25222456455230713,
      "learning_rate": 3.675e-05,
      "loss": 0.7594,
      "step": 530
    },
    {
      "epoch": 0.016255387132108504,
      "grad_norm": 0.3681707978248596,
      "learning_rate": 3.6725000000000005e-05,
      "loss": 1.0498,
      "step": 531
    },
    {
      "epoch": 0.01628599991390155,
      "grad_norm": 0.3991866111755371,
      "learning_rate": 3.6700000000000004e-05,
      "loss": 0.9273,
      "step": 532
    },
    {
      "epoch": 0.0163166126956946,
      "grad_norm": 0.2792300283908844,
      "learning_rate": 3.6675000000000004e-05,
      "loss": 0.8248,
      "step": 533
    },
    {
      "epoch": 0.016347225477487647,
      "grad_norm": 0.23072956502437592,
      "learning_rate": 3.665e-05,
      "loss": 0.8122,
      "step": 534
    },
    {
      "epoch": 0.016377838259280696,
      "grad_norm": 0.28638577461242676,
      "learning_rate": 3.6625e-05,
      "loss": 0.9414,
      "step": 535
    },
    {
      "epoch": 0.016408451041073743,
      "grad_norm": 0.438772052526474,
      "learning_rate": 3.66e-05,
      "loss": 0.9112,
      "step": 536
    },
    {
      "epoch": 0.016439063822866792,
      "grad_norm": 0.4045664966106415,
      "learning_rate": 3.6575e-05,
      "loss": 0.9539,
      "step": 537
    },
    {
      "epoch": 0.01646967660465984,
      "grad_norm": 0.34289786219596863,
      "learning_rate": 3.655e-05,
      "loss": 0.8361,
      "step": 538
    },
    {
      "epoch": 0.01650028938645289,
      "grad_norm": 0.40648511052131653,
      "learning_rate": 3.652500000000001e-05,
      "loss": 1.0726,
      "step": 539
    },
    {
      "epoch": 0.016530902168245935,
      "grad_norm": 0.4268176555633545,
      "learning_rate": 3.65e-05,
      "loss": 1.1008,
      "step": 540
    },
    {
      "epoch": 0.016561514950038984,
      "grad_norm": 0.34644895792007446,
      "learning_rate": 3.6475000000000006e-05,
      "loss": 0.7326,
      "step": 541
    },
    {
      "epoch": 0.01659212773183203,
      "grad_norm": 0.668674111366272,
      "learning_rate": 3.645e-05,
      "loss": 0.9912,
      "step": 542
    },
    {
      "epoch": 0.01662274051362508,
      "grad_norm": 0.47912219166755676,
      "learning_rate": 3.6425000000000004e-05,
      "loss": 1.0621,
      "step": 543
    },
    {
      "epoch": 0.016653353295418127,
      "grad_norm": 0.38176435232162476,
      "learning_rate": 3.6400000000000004e-05,
      "loss": 0.8036,
      "step": 544
    },
    {
      "epoch": 0.016683966077211176,
      "grad_norm": 0.2719891369342804,
      "learning_rate": 3.6375e-05,
      "loss": 0.7551,
      "step": 545
    },
    {
      "epoch": 0.016714578859004223,
      "grad_norm": 0.3830476403236389,
      "learning_rate": 3.635e-05,
      "loss": 0.7851,
      "step": 546
    },
    {
      "epoch": 0.016745191640797272,
      "grad_norm": 0.4239504933357239,
      "learning_rate": 3.6325e-05,
      "loss": 0.9947,
      "step": 547
    },
    {
      "epoch": 0.01677580442259032,
      "grad_norm": 1.0161861181259155,
      "learning_rate": 3.63e-05,
      "loss": 0.8239,
      "step": 548
    },
    {
      "epoch": 0.01680641720438337,
      "grad_norm": 0.24958951771259308,
      "learning_rate": 3.6275e-05,
      "loss": 0.8049,
      "step": 549
    },
    {
      "epoch": 0.016837029986176415,
      "grad_norm": 0.8752604722976685,
      "learning_rate": 3.625e-05,
      "loss": 0.9053,
      "step": 550
    },
    {
      "epoch": 0.016867642767969464,
      "grad_norm": 0.3758391737937927,
      "learning_rate": 3.6225000000000006e-05,
      "loss": 0.877,
      "step": 551
    },
    {
      "epoch": 0.01689825554976251,
      "grad_norm": 0.34654733538627625,
      "learning_rate": 3.62e-05,
      "loss": 0.9319,
      "step": 552
    },
    {
      "epoch": 0.01692886833155556,
      "grad_norm": 0.5001850724220276,
      "learning_rate": 3.6175000000000005e-05,
      "loss": 0.8768,
      "step": 553
    },
    {
      "epoch": 0.016959481113348607,
      "grad_norm": 0.2659253478050232,
      "learning_rate": 3.615e-05,
      "loss": 0.8194,
      "step": 554
    },
    {
      "epoch": 0.016990093895141656,
      "grad_norm": 0.3131043314933777,
      "learning_rate": 3.6125000000000004e-05,
      "loss": 0.8688,
      "step": 555
    },
    {
      "epoch": 0.017020706676934703,
      "grad_norm": 0.26013121008872986,
      "learning_rate": 3.61e-05,
      "loss": 0.8478,
      "step": 556
    },
    {
      "epoch": 0.017051319458727753,
      "grad_norm": 0.26441270112991333,
      "learning_rate": 3.6075e-05,
      "loss": 0.9225,
      "step": 557
    },
    {
      "epoch": 0.0170819322405208,
      "grad_norm": 0.28429439663887024,
      "learning_rate": 3.605e-05,
      "loss": 0.9211,
      "step": 558
    },
    {
      "epoch": 0.01711254502231385,
      "grad_norm": 0.4862866997718811,
      "learning_rate": 3.6025e-05,
      "loss": 0.8638,
      "step": 559
    },
    {
      "epoch": 0.017143157804106895,
      "grad_norm": 0.39234083890914917,
      "learning_rate": 3.6e-05,
      "loss": 0.8373,
      "step": 560
    },
    {
      "epoch": 0.017173770585899945,
      "grad_norm": 0.3691679835319519,
      "learning_rate": 3.5975e-05,
      "loss": 0.729,
      "step": 561
    },
    {
      "epoch": 0.01720438336769299,
      "grad_norm": 0.4773855209350586,
      "learning_rate": 3.595e-05,
      "loss": 0.7425,
      "step": 562
    },
    {
      "epoch": 0.01723499614948604,
      "grad_norm": 0.3226189613342285,
      "learning_rate": 3.5925000000000006e-05,
      "loss": 0.9762,
      "step": 563
    },
    {
      "epoch": 0.017265608931279087,
      "grad_norm": 0.38786566257476807,
      "learning_rate": 3.59e-05,
      "loss": 0.8073,
      "step": 564
    },
    {
      "epoch": 0.017296221713072137,
      "grad_norm": 0.2688257396221161,
      "learning_rate": 3.5875000000000005e-05,
      "loss": 0.6922,
      "step": 565
    },
    {
      "epoch": 0.017326834494865183,
      "grad_norm": 0.25467291474342346,
      "learning_rate": 3.585e-05,
      "loss": 0.8482,
      "step": 566
    },
    {
      "epoch": 0.017357447276658233,
      "grad_norm": 0.4827530086040497,
      "learning_rate": 3.5825000000000003e-05,
      "loss": 0.653,
      "step": 567
    },
    {
      "epoch": 0.01738806005845128,
      "grad_norm": 0.480927973985672,
      "learning_rate": 3.58e-05,
      "loss": 0.9425,
      "step": 568
    },
    {
      "epoch": 0.01741867284024433,
      "grad_norm": 0.3428303897380829,
      "learning_rate": 3.5775e-05,
      "loss": 0.9557,
      "step": 569
    },
    {
      "epoch": 0.017449285622037375,
      "grad_norm": 0.37214699387550354,
      "learning_rate": 3.575e-05,
      "loss": 0.8703,
      "step": 570
    },
    {
      "epoch": 0.017479898403830425,
      "grad_norm": 0.5661745071411133,
      "learning_rate": 3.5725e-05,
      "loss": 0.7014,
      "step": 571
    },
    {
      "epoch": 0.01751051118562347,
      "grad_norm": 0.49920737743377686,
      "learning_rate": 3.57e-05,
      "loss": 0.8206,
      "step": 572
    },
    {
      "epoch": 0.01754112396741652,
      "grad_norm": 0.29271331429481506,
      "learning_rate": 3.5675e-05,
      "loss": 0.8664,
      "step": 573
    },
    {
      "epoch": 0.017571736749209567,
      "grad_norm": 0.26470863819122314,
      "learning_rate": 3.565e-05,
      "loss": 0.9198,
      "step": 574
    },
    {
      "epoch": 0.017602349531002617,
      "grad_norm": 0.2486710101366043,
      "learning_rate": 3.5625000000000005e-05,
      "loss": 0.7273,
      "step": 575
    },
    {
      "epoch": 0.017632962312795663,
      "grad_norm": 0.21507400274276733,
      "learning_rate": 3.56e-05,
      "loss": 0.8161,
      "step": 576
    },
    {
      "epoch": 0.017663575094588713,
      "grad_norm": 0.34656915068626404,
      "learning_rate": 3.5575000000000004e-05,
      "loss": 0.9396,
      "step": 577
    },
    {
      "epoch": 0.01769418787638176,
      "grad_norm": 0.2706884741783142,
      "learning_rate": 3.555e-05,
      "loss": 0.7918,
      "step": 578
    },
    {
      "epoch": 0.01772480065817481,
      "grad_norm": 0.2701553702354431,
      "learning_rate": 3.5525e-05,
      "loss": 0.861,
      "step": 579
    },
    {
      "epoch": 0.017755413439967855,
      "grad_norm": 0.583931565284729,
      "learning_rate": 3.55e-05,
      "loss": 0.8764,
      "step": 580
    },
    {
      "epoch": 0.017786026221760905,
      "grad_norm": 0.1912572830915451,
      "learning_rate": 3.5475e-05,
      "loss": 0.7195,
      "step": 581
    },
    {
      "epoch": 0.01781663900355395,
      "grad_norm": 0.5616427063941956,
      "learning_rate": 3.545e-05,
      "loss": 0.8549,
      "step": 582
    },
    {
      "epoch": 0.017847251785347,
      "grad_norm": 0.3819722533226013,
      "learning_rate": 3.5425e-05,
      "loss": 0.93,
      "step": 583
    },
    {
      "epoch": 0.017877864567140047,
      "grad_norm": 0.36002546548843384,
      "learning_rate": 3.54e-05,
      "loss": 0.9775,
      "step": 584
    },
    {
      "epoch": 0.017908477348933097,
      "grad_norm": 0.4954698979854584,
      "learning_rate": 3.5375e-05,
      "loss": 0.9408,
      "step": 585
    },
    {
      "epoch": 0.017939090130726146,
      "grad_norm": 0.463946133852005,
      "learning_rate": 3.535e-05,
      "loss": 0.9196,
      "step": 586
    },
    {
      "epoch": 0.017969702912519193,
      "grad_norm": 0.4038791060447693,
      "learning_rate": 3.5325000000000005e-05,
      "loss": 0.9381,
      "step": 587
    },
    {
      "epoch": 0.018000315694312242,
      "grad_norm": 0.31586113572120667,
      "learning_rate": 3.53e-05,
      "loss": 0.8686,
      "step": 588
    },
    {
      "epoch": 0.01803092847610529,
      "grad_norm": 0.2986752688884735,
      "learning_rate": 3.5275000000000004e-05,
      "loss": 0.8434,
      "step": 589
    },
    {
      "epoch": 0.01806154125789834,
      "grad_norm": 0.3509804606437683,
      "learning_rate": 3.525e-05,
      "loss": 0.7944,
      "step": 590
    },
    {
      "epoch": 0.018092154039691385,
      "grad_norm": 0.2890898883342743,
      "learning_rate": 3.5225e-05,
      "loss": 0.774,
      "step": 591
    },
    {
      "epoch": 0.018122766821484435,
      "grad_norm": 0.31984376907348633,
      "learning_rate": 3.52e-05,
      "loss": 0.8466,
      "step": 592
    },
    {
      "epoch": 0.01815337960327748,
      "grad_norm": 0.3651171326637268,
      "learning_rate": 3.5175e-05,
      "loss": 0.919,
      "step": 593
    },
    {
      "epoch": 0.01818399238507053,
      "grad_norm": 0.2846416234970093,
      "learning_rate": 3.515e-05,
      "loss": 0.9245,
      "step": 594
    },
    {
      "epoch": 0.018214605166863577,
      "grad_norm": 0.26934918761253357,
      "learning_rate": 3.5125e-05,
      "loss": 0.8597,
      "step": 595
    },
    {
      "epoch": 0.018245217948656627,
      "grad_norm": 0.3009042739868164,
      "learning_rate": 3.51e-05,
      "loss": 0.8345,
      "step": 596
    },
    {
      "epoch": 0.018275830730449673,
      "grad_norm": 0.4045560956001282,
      "learning_rate": 3.5075000000000006e-05,
      "loss": 0.9014,
      "step": 597
    },
    {
      "epoch": 0.018306443512242723,
      "grad_norm": 0.32996201515197754,
      "learning_rate": 3.505e-05,
      "loss": 0.8844,
      "step": 598
    },
    {
      "epoch": 0.01833705629403577,
      "grad_norm": 0.4058746099472046,
      "learning_rate": 3.5025000000000004e-05,
      "loss": 0.9189,
      "step": 599
    },
    {
      "epoch": 0.01836766907582882,
      "grad_norm": 0.2129165232181549,
      "learning_rate": 3.5e-05,
      "loss": 0.87,
      "step": 600
    },
    {
      "epoch": 0.018398281857621865,
      "grad_norm": 0.4430253207683563,
      "learning_rate": 3.4975e-05,
      "loss": 0.8657,
      "step": 601
    },
    {
      "epoch": 0.018428894639414915,
      "grad_norm": 0.3259618878364563,
      "learning_rate": 3.495e-05,
      "loss": 0.8186,
      "step": 602
    },
    {
      "epoch": 0.01845950742120796,
      "grad_norm": 0.2879594564437866,
      "learning_rate": 3.4925e-05,
      "loss": 0.7099,
      "step": 603
    },
    {
      "epoch": 0.01849012020300101,
      "grad_norm": 0.6667692065238953,
      "learning_rate": 3.49e-05,
      "loss": 0.926,
      "step": 604
    },
    {
      "epoch": 0.018520732984794057,
      "grad_norm": 0.2307976484298706,
      "learning_rate": 3.4875e-05,
      "loss": 0.7976,
      "step": 605
    },
    {
      "epoch": 0.018551345766587107,
      "grad_norm": 1.4800783395767212,
      "learning_rate": 3.485e-05,
      "loss": 1.0168,
      "step": 606
    },
    {
      "epoch": 0.018581958548380153,
      "grad_norm": 0.2238824963569641,
      "learning_rate": 3.4825e-05,
      "loss": 0.7631,
      "step": 607
    },
    {
      "epoch": 0.018612571330173203,
      "grad_norm": 0.2500225901603699,
      "learning_rate": 3.48e-05,
      "loss": 0.7313,
      "step": 608
    },
    {
      "epoch": 0.01864318411196625,
      "grad_norm": 1.5721186399459839,
      "learning_rate": 3.4775000000000005e-05,
      "loss": 0.794,
      "step": 609
    },
    {
      "epoch": 0.0186737968937593,
      "grad_norm": 0.40846699476242065,
      "learning_rate": 3.475e-05,
      "loss": 0.9054,
      "step": 610
    },
    {
      "epoch": 0.018704409675552345,
      "grad_norm": 0.2577510178089142,
      "learning_rate": 3.4725000000000004e-05,
      "loss": 0.743,
      "step": 611
    },
    {
      "epoch": 0.018735022457345395,
      "grad_norm": 0.344911128282547,
      "learning_rate": 3.4699999999999996e-05,
      "loss": 0.8211,
      "step": 612
    },
    {
      "epoch": 0.01876563523913844,
      "grad_norm": 0.36572185158729553,
      "learning_rate": 3.4675e-05,
      "loss": 0.8432,
      "step": 613
    },
    {
      "epoch": 0.01879624802093149,
      "grad_norm": 0.34270673990249634,
      "learning_rate": 3.465e-05,
      "loss": 0.9061,
      "step": 614
    },
    {
      "epoch": 0.018826860802724537,
      "grad_norm": 0.3147833049297333,
      "learning_rate": 3.4625e-05,
      "loss": 0.8584,
      "step": 615
    },
    {
      "epoch": 0.018857473584517587,
      "grad_norm": 0.3632817566394806,
      "learning_rate": 3.46e-05,
      "loss": 0.9312,
      "step": 616
    },
    {
      "epoch": 0.018888086366310633,
      "grad_norm": 0.2850439250469208,
      "learning_rate": 3.4575e-05,
      "loss": 0.8784,
      "step": 617
    },
    {
      "epoch": 0.018918699148103683,
      "grad_norm": 0.6859099268913269,
      "learning_rate": 3.455e-05,
      "loss": 0.8974,
      "step": 618
    },
    {
      "epoch": 0.01894931192989673,
      "grad_norm": 0.25477033853530884,
      "learning_rate": 3.4525e-05,
      "loss": 0.7955,
      "step": 619
    },
    {
      "epoch": 0.01897992471168978,
      "grad_norm": 2.632477283477783,
      "learning_rate": 3.45e-05,
      "loss": 0.8219,
      "step": 620
    },
    {
      "epoch": 0.019010537493482825,
      "grad_norm": 0.4404035210609436,
      "learning_rate": 3.4475000000000005e-05,
      "loss": 0.8415,
      "step": 621
    },
    {
      "epoch": 0.019041150275275875,
      "grad_norm": 0.38690224289894104,
      "learning_rate": 3.445e-05,
      "loss": 0.8456,
      "step": 622
    },
    {
      "epoch": 0.01907176305706892,
      "grad_norm": 0.5020187497138977,
      "learning_rate": 3.4425e-05,
      "loss": 0.9359,
      "step": 623
    },
    {
      "epoch": 0.01910237583886197,
      "grad_norm": 0.3279241621494293,
      "learning_rate": 3.4399999999999996e-05,
      "loss": 0.8769,
      "step": 624
    },
    {
      "epoch": 0.019132988620655017,
      "grad_norm": 0.627220630645752,
      "learning_rate": 3.4375e-05,
      "loss": 0.9002,
      "step": 625
    },
    {
      "epoch": 0.019163601402448067,
      "grad_norm": 0.3282918632030487,
      "learning_rate": 3.435e-05,
      "loss": 1.0085,
      "step": 626
    },
    {
      "epoch": 0.019194214184241113,
      "grad_norm": 0.2484091967344284,
      "learning_rate": 3.4325e-05,
      "loss": 0.8351,
      "step": 627
    },
    {
      "epoch": 0.019224826966034163,
      "grad_norm": 0.2688825726509094,
      "learning_rate": 3.430000000000001e-05,
      "loss": 0.7895,
      "step": 628
    },
    {
      "epoch": 0.01925543974782721,
      "grad_norm": 0.5772212743759155,
      "learning_rate": 3.4275e-05,
      "loss": 0.8499,
      "step": 629
    },
    {
      "epoch": 0.01928605252962026,
      "grad_norm": 0.3061743974685669,
      "learning_rate": 3.4250000000000006e-05,
      "loss": 0.8477,
      "step": 630
    },
    {
      "epoch": 0.019316665311413305,
      "grad_norm": 0.2991320788860321,
      "learning_rate": 3.4225e-05,
      "loss": 0.7652,
      "step": 631
    },
    {
      "epoch": 0.019347278093206355,
      "grad_norm": 0.317810982465744,
      "learning_rate": 3.4200000000000005e-05,
      "loss": 1.0683,
      "step": 632
    },
    {
      "epoch": 0.0193778908749994,
      "grad_norm": 0.35662874579429626,
      "learning_rate": 3.4175000000000004e-05,
      "loss": 0.8481,
      "step": 633
    },
    {
      "epoch": 0.01940850365679245,
      "grad_norm": 0.3270532786846161,
      "learning_rate": 3.415e-05,
      "loss": 0.5951,
      "step": 634
    },
    {
      "epoch": 0.019439116438585497,
      "grad_norm": 0.4919185936450958,
      "learning_rate": 3.4125e-05,
      "loss": 0.9823,
      "step": 635
    },
    {
      "epoch": 0.019469729220378547,
      "grad_norm": 0.19817084074020386,
      "learning_rate": 3.41e-05,
      "loss": 0.6874,
      "step": 636
    },
    {
      "epoch": 0.019500342002171593,
      "grad_norm": 0.2106780856847763,
      "learning_rate": 3.4075e-05,
      "loss": 0.8035,
      "step": 637
    },
    {
      "epoch": 0.019530954783964643,
      "grad_norm": 0.26843005418777466,
      "learning_rate": 3.405e-05,
      "loss": 0.7261,
      "step": 638
    },
    {
      "epoch": 0.01956156756575769,
      "grad_norm": 0.36957550048828125,
      "learning_rate": 3.4025e-05,
      "loss": 0.8018,
      "step": 639
    },
    {
      "epoch": 0.01959218034755074,
      "grad_norm": 0.24422922730445862,
      "learning_rate": 3.4000000000000007e-05,
      "loss": 0.7871,
      "step": 640
    },
    {
      "epoch": 0.019622793129343785,
      "grad_norm": 0.2228461652994156,
      "learning_rate": 3.3975e-05,
      "loss": 0.7705,
      "step": 641
    },
    {
      "epoch": 0.019653405911136835,
      "grad_norm": 0.24270862340927124,
      "learning_rate": 3.3950000000000005e-05,
      "loss": 0.8143,
      "step": 642
    },
    {
      "epoch": 0.01968401869292988,
      "grad_norm": 0.23259024322032928,
      "learning_rate": 3.3925e-05,
      "loss": 0.8723,
      "step": 643
    },
    {
      "epoch": 0.01971463147472293,
      "grad_norm": 0.2720611095428467,
      "learning_rate": 3.3900000000000004e-05,
      "loss": 0.7758,
      "step": 644
    },
    {
      "epoch": 0.019745244256515977,
      "grad_norm": 0.25908133387565613,
      "learning_rate": 3.3875000000000003e-05,
      "loss": 0.7107,
      "step": 645
    },
    {
      "epoch": 0.019775857038309027,
      "grad_norm": 0.37921658158302307,
      "learning_rate": 3.385e-05,
      "loss": 0.7974,
      "step": 646
    },
    {
      "epoch": 0.019806469820102073,
      "grad_norm": 0.33871352672576904,
      "learning_rate": 3.3825e-05,
      "loss": 0.7902,
      "step": 647
    },
    {
      "epoch": 0.019837082601895123,
      "grad_norm": 0.35260137915611267,
      "learning_rate": 3.38e-05,
      "loss": 0.9127,
      "step": 648
    },
    {
      "epoch": 0.01986769538368817,
      "grad_norm": 0.26341089606285095,
      "learning_rate": 3.3775e-05,
      "loss": 0.7844,
      "step": 649
    },
    {
      "epoch": 0.01989830816548122,
      "grad_norm": 0.24807937443256378,
      "learning_rate": 3.375000000000001e-05,
      "loss": 0.7732,
      "step": 650
    },
    {
      "epoch": 0.019928920947274265,
      "grad_norm": 0.33860963582992554,
      "learning_rate": 3.3725e-05,
      "loss": 0.8426,
      "step": 651
    },
    {
      "epoch": 0.019959533729067315,
      "grad_norm": 0.2957013249397278,
      "learning_rate": 3.3700000000000006e-05,
      "loss": 0.9702,
      "step": 652
    },
    {
      "epoch": 0.01999014651086036,
      "grad_norm": 0.24360673129558563,
      "learning_rate": 3.3675e-05,
      "loss": 0.8597,
      "step": 653
    },
    {
      "epoch": 0.02002075929265341,
      "grad_norm": 0.5375427007675171,
      "learning_rate": 3.3650000000000005e-05,
      "loss": 0.9312,
      "step": 654
    },
    {
      "epoch": 0.020051372074446457,
      "grad_norm": 0.33342239260673523,
      "learning_rate": 3.3625000000000004e-05,
      "loss": 0.966,
      "step": 655
    },
    {
      "epoch": 0.020081984856239507,
      "grad_norm": 0.26135486364364624,
      "learning_rate": 3.3600000000000004e-05,
      "loss": 0.9123,
      "step": 656
    },
    {
      "epoch": 0.020112597638032553,
      "grad_norm": 0.3569001257419586,
      "learning_rate": 3.3575e-05,
      "loss": 0.7723,
      "step": 657
    },
    {
      "epoch": 0.020143210419825603,
      "grad_norm": 0.22794514894485474,
      "learning_rate": 3.355e-05,
      "loss": 0.8611,
      "step": 658
    },
    {
      "epoch": 0.02017382320161865,
      "grad_norm": 0.235270157456398,
      "learning_rate": 3.3525e-05,
      "loss": 0.7918,
      "step": 659
    },
    {
      "epoch": 0.0202044359834117,
      "grad_norm": 0.22864991426467896,
      "learning_rate": 3.35e-05,
      "loss": 0.9322,
      "step": 660
    },
    {
      "epoch": 0.020235048765204745,
      "grad_norm": 0.48451244831085205,
      "learning_rate": 3.3475e-05,
      "loss": 0.8038,
      "step": 661
    },
    {
      "epoch": 0.020265661546997795,
      "grad_norm": 0.45683759450912476,
      "learning_rate": 3.345000000000001e-05,
      "loss": 0.8445,
      "step": 662
    },
    {
      "epoch": 0.02029627432879084,
      "grad_norm": 0.4464922547340393,
      "learning_rate": 3.3425e-05,
      "loss": 0.8633,
      "step": 663
    },
    {
      "epoch": 0.02032688711058389,
      "grad_norm": 0.23868532478809357,
      "learning_rate": 3.3400000000000005e-05,
      "loss": 0.78,
      "step": 664
    },
    {
      "epoch": 0.020357499892376937,
      "grad_norm": 0.24545443058013916,
      "learning_rate": 3.3375e-05,
      "loss": 0.6389,
      "step": 665
    },
    {
      "epoch": 0.020388112674169987,
      "grad_norm": 0.4667823314666748,
      "learning_rate": 3.3350000000000004e-05,
      "loss": 0.7948,
      "step": 666
    },
    {
      "epoch": 0.020418725455963033,
      "grad_norm": 0.27301958203315735,
      "learning_rate": 3.3325000000000004e-05,
      "loss": 0.8955,
      "step": 667
    },
    {
      "epoch": 0.020449338237756083,
      "grad_norm": 0.4060043394565582,
      "learning_rate": 3.33e-05,
      "loss": 1.0506,
      "step": 668
    },
    {
      "epoch": 0.020479951019549133,
      "grad_norm": 0.31966814398765564,
      "learning_rate": 3.3275e-05,
      "loss": 0.9239,
      "step": 669
    },
    {
      "epoch": 0.02051056380134218,
      "grad_norm": 0.2404809296131134,
      "learning_rate": 3.325e-05,
      "loss": 0.788,
      "step": 670
    },
    {
      "epoch": 0.02054117658313523,
      "grad_norm": 0.24925701320171356,
      "learning_rate": 3.3225e-05,
      "loss": 0.881,
      "step": 671
    },
    {
      "epoch": 0.020571789364928275,
      "grad_norm": 0.1997763067483902,
      "learning_rate": 3.32e-05,
      "loss": 0.7449,
      "step": 672
    },
    {
      "epoch": 0.020602402146721325,
      "grad_norm": 0.25853338837623596,
      "learning_rate": 3.3175e-05,
      "loss": 0.6971,
      "step": 673
    },
    {
      "epoch": 0.02063301492851437,
      "grad_norm": 0.4804621636867523,
      "learning_rate": 3.3150000000000006e-05,
      "loss": 0.9918,
      "step": 674
    },
    {
      "epoch": 0.02066362771030742,
      "grad_norm": 0.21898382902145386,
      "learning_rate": 3.3125e-05,
      "loss": 0.8263,
      "step": 675
    },
    {
      "epoch": 0.020694240492100467,
      "grad_norm": 0.30123060941696167,
      "learning_rate": 3.3100000000000005e-05,
      "loss": 0.9395,
      "step": 676
    },
    {
      "epoch": 0.020724853273893517,
      "grad_norm": 0.5006719827651978,
      "learning_rate": 3.3075e-05,
      "loss": 0.8282,
      "step": 677
    },
    {
      "epoch": 0.020755466055686563,
      "grad_norm": 0.29548969864845276,
      "learning_rate": 3.3050000000000004e-05,
      "loss": 0.6688,
      "step": 678
    },
    {
      "epoch": 0.020786078837479613,
      "grad_norm": 0.22635269165039062,
      "learning_rate": 3.3025e-05,
      "loss": 0.6097,
      "step": 679
    },
    {
      "epoch": 0.02081669161927266,
      "grad_norm": 0.23651093244552612,
      "learning_rate": 3.3e-05,
      "loss": 0.8173,
      "step": 680
    },
    {
      "epoch": 0.02084730440106571,
      "grad_norm": 0.23334072530269623,
      "learning_rate": 3.2975e-05,
      "loss": 0.868,
      "step": 681
    },
    {
      "epoch": 0.020877917182858755,
      "grad_norm": 0.24444061517715454,
      "learning_rate": 3.295e-05,
      "loss": 0.9635,
      "step": 682
    },
    {
      "epoch": 0.020908529964651805,
      "grad_norm": 0.3132634162902832,
      "learning_rate": 3.2925e-05,
      "loss": 0.832,
      "step": 683
    },
    {
      "epoch": 0.02093914274644485,
      "grad_norm": 0.4759562313556671,
      "learning_rate": 3.29e-05,
      "loss": 0.8461,
      "step": 684
    },
    {
      "epoch": 0.0209697555282379,
      "grad_norm": 0.22263555228710175,
      "learning_rate": 3.2875e-05,
      "loss": 0.7327,
      "step": 685
    },
    {
      "epoch": 0.021000368310030947,
      "grad_norm": 0.22245599329471588,
      "learning_rate": 3.2850000000000006e-05,
      "loss": 0.6417,
      "step": 686
    },
    {
      "epoch": 0.021030981091823997,
      "grad_norm": 0.26056239008903503,
      "learning_rate": 3.2825e-05,
      "loss": 0.9546,
      "step": 687
    },
    {
      "epoch": 0.021061593873617043,
      "grad_norm": 0.3715822994709015,
      "learning_rate": 3.2800000000000004e-05,
      "loss": 0.9541,
      "step": 688
    },
    {
      "epoch": 0.021092206655410093,
      "grad_norm": 1.6285955905914307,
      "learning_rate": 3.2775e-05,
      "loss": 0.9319,
      "step": 689
    },
    {
      "epoch": 0.02112281943720314,
      "grad_norm": 0.2274431586265564,
      "learning_rate": 3.275e-05,
      "loss": 0.7943,
      "step": 690
    },
    {
      "epoch": 0.02115343221899619,
      "grad_norm": 0.2846313416957855,
      "learning_rate": 3.2725e-05,
      "loss": 0.7512,
      "step": 691
    },
    {
      "epoch": 0.021184045000789235,
      "grad_norm": 0.300325870513916,
      "learning_rate": 3.27e-05,
      "loss": 0.7753,
      "step": 692
    },
    {
      "epoch": 0.021214657782582285,
      "grad_norm": 0.24972262978553772,
      "learning_rate": 3.2675e-05,
      "loss": 0.7846,
      "step": 693
    },
    {
      "epoch": 0.02124527056437533,
      "grad_norm": 0.3442905843257904,
      "learning_rate": 3.265e-05,
      "loss": 0.6865,
      "step": 694
    },
    {
      "epoch": 0.02127588334616838,
      "grad_norm": 0.25781068205833435,
      "learning_rate": 3.2625e-05,
      "loss": 0.8046,
      "step": 695
    },
    {
      "epoch": 0.021306496127961427,
      "grad_norm": 0.20101618766784668,
      "learning_rate": 3.26e-05,
      "loss": 0.679,
      "step": 696
    },
    {
      "epoch": 0.021337108909754477,
      "grad_norm": 0.2829378545284271,
      "learning_rate": 3.2575e-05,
      "loss": 0.7059,
      "step": 697
    },
    {
      "epoch": 0.021367721691547523,
      "grad_norm": 0.3690490126609802,
      "learning_rate": 3.2550000000000005e-05,
      "loss": 0.9481,
      "step": 698
    },
    {
      "epoch": 0.021398334473340573,
      "grad_norm": 0.2816748321056366,
      "learning_rate": 3.2525e-05,
      "loss": 0.8702,
      "step": 699
    },
    {
      "epoch": 0.02142894725513362,
      "grad_norm": 0.19525286555290222,
      "learning_rate": 3.2500000000000004e-05,
      "loss": 0.8203,
      "step": 700
    },
    {
      "epoch": 0.02145956003692667,
      "grad_norm": 0.5064207315444946,
      "learning_rate": 3.2474999999999997e-05,
      "loss": 0.9983,
      "step": 701
    },
    {
      "epoch": 0.021490172818719715,
      "grad_norm": 0.281988263130188,
      "learning_rate": 3.245e-05,
      "loss": 0.9919,
      "step": 702
    },
    {
      "epoch": 0.021520785600512765,
      "grad_norm": 0.38635537028312683,
      "learning_rate": 3.2425e-05,
      "loss": 0.7907,
      "step": 703
    },
    {
      "epoch": 0.02155139838230581,
      "grad_norm": 0.25867390632629395,
      "learning_rate": 3.24e-05,
      "loss": 0.6644,
      "step": 704
    },
    {
      "epoch": 0.02158201116409886,
      "grad_norm": 0.5822469592094421,
      "learning_rate": 3.2375e-05,
      "loss": 0.966,
      "step": 705
    },
    {
      "epoch": 0.021612623945891907,
      "grad_norm": 0.3561427891254425,
      "learning_rate": 3.235e-05,
      "loss": 0.7946,
      "step": 706
    },
    {
      "epoch": 0.021643236727684957,
      "grad_norm": 0.6144790053367615,
      "learning_rate": 3.2325e-05,
      "loss": 0.7853,
      "step": 707
    },
    {
      "epoch": 0.021673849509478003,
      "grad_norm": 0.2598865032196045,
      "learning_rate": 3.2300000000000006e-05,
      "loss": 0.7355,
      "step": 708
    },
    {
      "epoch": 0.021704462291271053,
      "grad_norm": 0.2407061904668808,
      "learning_rate": 3.2275e-05,
      "loss": 0.7681,
      "step": 709
    },
    {
      "epoch": 0.0217350750730641,
      "grad_norm": 0.2833166718482971,
      "learning_rate": 3.2250000000000005e-05,
      "loss": 0.7105,
      "step": 710
    },
    {
      "epoch": 0.02176568785485715,
      "grad_norm": 0.31923380494117737,
      "learning_rate": 3.2225e-05,
      "loss": 0.8198,
      "step": 711
    },
    {
      "epoch": 0.021796300636650195,
      "grad_norm": 0.19643419981002808,
      "learning_rate": 3.2200000000000003e-05,
      "loss": 0.7997,
      "step": 712
    },
    {
      "epoch": 0.021826913418443245,
      "grad_norm": 0.3478236794471741,
      "learning_rate": 3.2175e-05,
      "loss": 0.8043,
      "step": 713
    },
    {
      "epoch": 0.02185752620023629,
      "grad_norm": 0.16406095027923584,
      "learning_rate": 3.215e-05,
      "loss": 0.7189,
      "step": 714
    },
    {
      "epoch": 0.02188813898202934,
      "grad_norm": 0.2533716857433319,
      "learning_rate": 3.2125e-05,
      "loss": 0.859,
      "step": 715
    },
    {
      "epoch": 0.021918751763822387,
      "grad_norm": 0.3155074119567871,
      "learning_rate": 3.21e-05,
      "loss": 0.8564,
      "step": 716
    },
    {
      "epoch": 0.021949364545615437,
      "grad_norm": 0.306972861289978,
      "learning_rate": 3.2075e-05,
      "loss": 0.8233,
      "step": 717
    },
    {
      "epoch": 0.021979977327408484,
      "grad_norm": 0.3417617082595825,
      "learning_rate": 3.205e-05,
      "loss": 0.87,
      "step": 718
    },
    {
      "epoch": 0.022010590109201533,
      "grad_norm": 0.2689690589904785,
      "learning_rate": 3.2025e-05,
      "loss": 0.9532,
      "step": 719
    },
    {
      "epoch": 0.02204120289099458,
      "grad_norm": 0.29407599568367004,
      "learning_rate": 3.2000000000000005e-05,
      "loss": 0.8745,
      "step": 720
    },
    {
      "epoch": 0.02207181567278763,
      "grad_norm": 0.23472920060157776,
      "learning_rate": 3.1975e-05,
      "loss": 0.7489,
      "step": 721
    },
    {
      "epoch": 0.022102428454580676,
      "grad_norm": 0.2847742736339569,
      "learning_rate": 3.1950000000000004e-05,
      "loss": 0.8242,
      "step": 722
    },
    {
      "epoch": 0.022133041236373725,
      "grad_norm": 0.3023678660392761,
      "learning_rate": 3.1925e-05,
      "loss": 0.8881,
      "step": 723
    },
    {
      "epoch": 0.02216365401816677,
      "grad_norm": 0.32522132992744446,
      "learning_rate": 3.19e-05,
      "loss": 0.7238,
      "step": 724
    },
    {
      "epoch": 0.02219426679995982,
      "grad_norm": 0.3445587158203125,
      "learning_rate": 3.1875e-05,
      "loss": 0.8984,
      "step": 725
    },
    {
      "epoch": 0.022224879581752868,
      "grad_norm": 0.3725389540195465,
      "learning_rate": 3.185e-05,
      "loss": 0.9133,
      "step": 726
    },
    {
      "epoch": 0.022255492363545917,
      "grad_norm": 0.2710118889808655,
      "learning_rate": 3.1825e-05,
      "loss": 0.6846,
      "step": 727
    },
    {
      "epoch": 0.022286105145338964,
      "grad_norm": 0.242015078663826,
      "learning_rate": 3.18e-05,
      "loss": 0.8476,
      "step": 728
    },
    {
      "epoch": 0.022316717927132013,
      "grad_norm": 0.3446301519870758,
      "learning_rate": 3.1775e-05,
      "loss": 0.9087,
      "step": 729
    },
    {
      "epoch": 0.02234733070892506,
      "grad_norm": 0.23716330528259277,
      "learning_rate": 3.175e-05,
      "loss": 0.7564,
      "step": 730
    },
    {
      "epoch": 0.02237794349071811,
      "grad_norm": 0.2898913025856018,
      "learning_rate": 3.1725e-05,
      "loss": 0.9085,
      "step": 731
    },
    {
      "epoch": 0.022408556272511156,
      "grad_norm": 0.417550265789032,
      "learning_rate": 3.1700000000000005e-05,
      "loss": 0.7401,
      "step": 732
    },
    {
      "epoch": 0.022439169054304205,
      "grad_norm": 0.37556731700897217,
      "learning_rate": 3.1675e-05,
      "loss": 0.7357,
      "step": 733
    },
    {
      "epoch": 0.02246978183609725,
      "grad_norm": 0.3006756901741028,
      "learning_rate": 3.1650000000000004e-05,
      "loss": 0.8256,
      "step": 734
    },
    {
      "epoch": 0.0225003946178903,
      "grad_norm": 0.20967251062393188,
      "learning_rate": 3.1624999999999996e-05,
      "loss": 0.9108,
      "step": 735
    },
    {
      "epoch": 0.022531007399683348,
      "grad_norm": 0.25987106561660767,
      "learning_rate": 3.16e-05,
      "loss": 0.7176,
      "step": 736
    },
    {
      "epoch": 0.022561620181476397,
      "grad_norm": 0.21960951387882233,
      "learning_rate": 3.1575e-05,
      "loss": 0.775,
      "step": 737
    },
    {
      "epoch": 0.022592232963269444,
      "grad_norm": 0.3762724995613098,
      "learning_rate": 3.155e-05,
      "loss": 0.7512,
      "step": 738
    },
    {
      "epoch": 0.022622845745062493,
      "grad_norm": 0.29506343603134155,
      "learning_rate": 3.1525e-05,
      "loss": 0.9364,
      "step": 739
    },
    {
      "epoch": 0.02265345852685554,
      "grad_norm": 0.15396283566951752,
      "learning_rate": 3.15e-05,
      "loss": 0.7263,
      "step": 740
    },
    {
      "epoch": 0.02268407130864859,
      "grad_norm": 0.6290895938873291,
      "learning_rate": 3.1475e-05,
      "loss": 0.8365,
      "step": 741
    },
    {
      "epoch": 0.022714684090441636,
      "grad_norm": 0.2834290862083435,
      "learning_rate": 3.145e-05,
      "loss": 0.7367,
      "step": 742
    },
    {
      "epoch": 0.022745296872234685,
      "grad_norm": 0.2875668406486511,
      "learning_rate": 3.1425e-05,
      "loss": 0.8126,
      "step": 743
    },
    {
      "epoch": 0.02277590965402773,
      "grad_norm": 0.2366083562374115,
      "learning_rate": 3.1400000000000004e-05,
      "loss": 0.9083,
      "step": 744
    },
    {
      "epoch": 0.02280652243582078,
      "grad_norm": 0.2610970735549927,
      "learning_rate": 3.1375e-05,
      "loss": 0.7526,
      "step": 745
    },
    {
      "epoch": 0.022837135217613828,
      "grad_norm": 0.2922974228858948,
      "learning_rate": 3.135e-05,
      "loss": 0.7532,
      "step": 746
    },
    {
      "epoch": 0.022867747999406877,
      "grad_norm": 0.38793912529945374,
      "learning_rate": 3.1324999999999996e-05,
      "loss": 0.8947,
      "step": 747
    },
    {
      "epoch": 0.022898360781199924,
      "grad_norm": 0.27345848083496094,
      "learning_rate": 3.13e-05,
      "loss": 0.9365,
      "step": 748
    },
    {
      "epoch": 0.022928973562992973,
      "grad_norm": 0.30769574642181396,
      "learning_rate": 3.1275e-05,
      "loss": 0.9476,
      "step": 749
    },
    {
      "epoch": 0.02295958634478602,
      "grad_norm": 0.4861595332622528,
      "learning_rate": 3.125e-05,
      "loss": 0.9242,
      "step": 750
    },
    {
      "epoch": 0.02299019912657907,
      "grad_norm": 0.4081629812717438,
      "learning_rate": 3.122500000000001e-05,
      "loss": 0.8595,
      "step": 751
    },
    {
      "epoch": 0.023020811908372116,
      "grad_norm": 0.34558138251304626,
      "learning_rate": 3.12e-05,
      "loss": 0.8796,
      "step": 752
    },
    {
      "epoch": 0.023051424690165166,
      "grad_norm": 0.1930524855852127,
      "learning_rate": 3.1175000000000006e-05,
      "loss": 0.734,
      "step": 753
    },
    {
      "epoch": 0.023082037471958215,
      "grad_norm": 0.32900071144104004,
      "learning_rate": 3.115e-05,
      "loss": 0.9205,
      "step": 754
    },
    {
      "epoch": 0.02311265025375126,
      "grad_norm": 0.22188574075698853,
      "learning_rate": 3.1125000000000004e-05,
      "loss": 0.8055,
      "step": 755
    },
    {
      "epoch": 0.02314326303554431,
      "grad_norm": 0.19028417766094208,
      "learning_rate": 3.1100000000000004e-05,
      "loss": 0.7111,
      "step": 756
    },
    {
      "epoch": 0.023173875817337358,
      "grad_norm": 0.4052783250808716,
      "learning_rate": 3.1075e-05,
      "loss": 0.7749,
      "step": 757
    },
    {
      "epoch": 0.023204488599130407,
      "grad_norm": 0.3834271728992462,
      "learning_rate": 3.105e-05,
      "loss": 0.9387,
      "step": 758
    },
    {
      "epoch": 0.023235101380923454,
      "grad_norm": 0.2380029261112213,
      "learning_rate": 3.1025e-05,
      "loss": 0.6087,
      "step": 759
    },
    {
      "epoch": 0.023265714162716503,
      "grad_norm": 0.20628444850444794,
      "learning_rate": 3.1e-05,
      "loss": 0.8398,
      "step": 760
    },
    {
      "epoch": 0.02329632694450955,
      "grad_norm": 0.21775928139686584,
      "learning_rate": 3.0975e-05,
      "loss": 0.8315,
      "step": 761
    },
    {
      "epoch": 0.0233269397263026,
      "grad_norm": 0.3498813807964325,
      "learning_rate": 3.095e-05,
      "loss": 0.7979,
      "step": 762
    },
    {
      "epoch": 0.023357552508095646,
      "grad_norm": 0.22466371953487396,
      "learning_rate": 3.0925000000000006e-05,
      "loss": 0.7129,
      "step": 763
    },
    {
      "epoch": 0.023388165289888695,
      "grad_norm": 0.3326520621776581,
      "learning_rate": 3.09e-05,
      "loss": 0.7398,
      "step": 764
    },
    {
      "epoch": 0.02341877807168174,
      "grad_norm": 0.26721322536468506,
      "learning_rate": 3.0875000000000005e-05,
      "loss": 0.7359,
      "step": 765
    },
    {
      "epoch": 0.02344939085347479,
      "grad_norm": 0.6605486869812012,
      "learning_rate": 3.0850000000000004e-05,
      "loss": 0.8366,
      "step": 766
    },
    {
      "epoch": 0.023480003635267838,
      "grad_norm": 0.23785540461540222,
      "learning_rate": 3.0825000000000004e-05,
      "loss": 0.8376,
      "step": 767
    },
    {
      "epoch": 0.023510616417060887,
      "grad_norm": 0.2734736204147339,
      "learning_rate": 3.08e-05,
      "loss": 0.8925,
      "step": 768
    },
    {
      "epoch": 0.023541229198853934,
      "grad_norm": 0.6637300252914429,
      "learning_rate": 3.0775e-05,
      "loss": 0.9716,
      "step": 769
    },
    {
      "epoch": 0.023571841980646983,
      "grad_norm": 0.27405065298080444,
      "learning_rate": 3.075e-05,
      "loss": 0.8878,
      "step": 770
    },
    {
      "epoch": 0.02360245476244003,
      "grad_norm": 0.24558964371681213,
      "learning_rate": 3.0725e-05,
      "loss": 0.8531,
      "step": 771
    },
    {
      "epoch": 0.02363306754423308,
      "grad_norm": 0.34643644094467163,
      "learning_rate": 3.07e-05,
      "loss": 0.8568,
      "step": 772
    },
    {
      "epoch": 0.023663680326026126,
      "grad_norm": 0.17521505057811737,
      "learning_rate": 3.067500000000001e-05,
      "loss": 0.814,
      "step": 773
    },
    {
      "epoch": 0.023694293107819175,
      "grad_norm": 0.27538201212882996,
      "learning_rate": 3.065e-05,
      "loss": 0.6619,
      "step": 774
    },
    {
      "epoch": 0.02372490588961222,
      "grad_norm": 0.25624993443489075,
      "learning_rate": 3.0625000000000006e-05,
      "loss": 0.7658,
      "step": 775
    },
    {
      "epoch": 0.02375551867140527,
      "grad_norm": 0.27944788336753845,
      "learning_rate": 3.06e-05,
      "loss": 0.6969,
      "step": 776
    },
    {
      "epoch": 0.023786131453198318,
      "grad_norm": 0.33092567324638367,
      "learning_rate": 3.0575000000000005e-05,
      "loss": 0.703,
      "step": 777
    },
    {
      "epoch": 0.023816744234991367,
      "grad_norm": 0.41218432784080505,
      "learning_rate": 3.0550000000000004e-05,
      "loss": 0.7397,
      "step": 778
    },
    {
      "epoch": 0.023847357016784414,
      "grad_norm": 0.20737531781196594,
      "learning_rate": 3.0525e-05,
      "loss": 0.7129,
      "step": 779
    },
    {
      "epoch": 0.023877969798577463,
      "grad_norm": 0.27646133303642273,
      "learning_rate": 3.05e-05,
      "loss": 0.7498,
      "step": 780
    },
    {
      "epoch": 0.02390858258037051,
      "grad_norm": 0.32983580231666565,
      "learning_rate": 3.0475000000000002e-05,
      "loss": 0.8936,
      "step": 781
    },
    {
      "epoch": 0.02393919536216356,
      "grad_norm": 0.2052886039018631,
      "learning_rate": 3.045e-05,
      "loss": 0.7956,
      "step": 782
    },
    {
      "epoch": 0.023969808143956606,
      "grad_norm": 0.24393165111541748,
      "learning_rate": 3.0425000000000004e-05,
      "loss": 0.8344,
      "step": 783
    },
    {
      "epoch": 0.024000420925749655,
      "grad_norm": 0.21573598682880402,
      "learning_rate": 3.04e-05,
      "loss": 0.8509,
      "step": 784
    },
    {
      "epoch": 0.024031033707542702,
      "grad_norm": 0.1957068145275116,
      "learning_rate": 3.0375000000000003e-05,
      "loss": 0.8256,
      "step": 785
    },
    {
      "epoch": 0.02406164648933575,
      "grad_norm": 0.20561254024505615,
      "learning_rate": 3.035e-05,
      "loss": 0.772,
      "step": 786
    },
    {
      "epoch": 0.024092259271128798,
      "grad_norm": 0.22493137419223785,
      "learning_rate": 3.0325000000000002e-05,
      "loss": 0.8001,
      "step": 787
    },
    {
      "epoch": 0.024122872052921848,
      "grad_norm": 0.29237043857574463,
      "learning_rate": 3.03e-05,
      "loss": 0.6665,
      "step": 788
    },
    {
      "epoch": 0.024153484834714894,
      "grad_norm": 0.23693957924842834,
      "learning_rate": 3.0275000000000004e-05,
      "loss": 0.7418,
      "step": 789
    },
    {
      "epoch": 0.024184097616507944,
      "grad_norm": 0.25274136662483215,
      "learning_rate": 3.025e-05,
      "loss": 0.7289,
      "step": 790
    },
    {
      "epoch": 0.02421471039830099,
      "grad_norm": 0.4060211777687073,
      "learning_rate": 3.0225000000000003e-05,
      "loss": 0.8746,
      "step": 791
    },
    {
      "epoch": 0.02424532318009404,
      "grad_norm": 0.25534942746162415,
      "learning_rate": 3.02e-05,
      "loss": 0.7885,
      "step": 792
    },
    {
      "epoch": 0.024275935961887086,
      "grad_norm": 0.22390544414520264,
      "learning_rate": 3.0175e-05,
      "loss": 0.8449,
      "step": 793
    },
    {
      "epoch": 0.024306548743680136,
      "grad_norm": 0.1773185133934021,
      "learning_rate": 3.015e-05,
      "loss": 0.7657,
      "step": 794
    },
    {
      "epoch": 0.024337161525473182,
      "grad_norm": 0.22006359696388245,
      "learning_rate": 3.0125000000000004e-05,
      "loss": 0.8952,
      "step": 795
    },
    {
      "epoch": 0.02436777430726623,
      "grad_norm": 0.2293826788663864,
      "learning_rate": 3.01e-05,
      "loss": 0.7759,
      "step": 796
    },
    {
      "epoch": 0.024398387089059278,
      "grad_norm": 0.283991277217865,
      "learning_rate": 3.0075000000000003e-05,
      "loss": 0.8097,
      "step": 797
    },
    {
      "epoch": 0.024428999870852328,
      "grad_norm": 0.3686857223510742,
      "learning_rate": 3.0050000000000002e-05,
      "loss": 0.8394,
      "step": 798
    },
    {
      "epoch": 0.024459612652645374,
      "grad_norm": 0.2633674442768097,
      "learning_rate": 3.0025000000000005e-05,
      "loss": 0.8509,
      "step": 799
    },
    {
      "epoch": 0.024490225434438424,
      "grad_norm": 0.3190794289112091,
      "learning_rate": 3e-05,
      "loss": 0.6887,
      "step": 800
    },
    {
      "epoch": 0.02452083821623147,
      "grad_norm": 0.26970425248146057,
      "learning_rate": 2.9975000000000004e-05,
      "loss": 0.7701,
      "step": 801
    },
    {
      "epoch": 0.02455145099802452,
      "grad_norm": 0.2365722954273224,
      "learning_rate": 2.995e-05,
      "loss": 0.805,
      "step": 802
    },
    {
      "epoch": 0.024582063779817566,
      "grad_norm": 0.3403000235557556,
      "learning_rate": 2.9925000000000002e-05,
      "loss": 0.8458,
      "step": 803
    },
    {
      "epoch": 0.024612676561610616,
      "grad_norm": 0.2706793248653412,
      "learning_rate": 2.9900000000000002e-05,
      "loss": 0.7714,
      "step": 804
    },
    {
      "epoch": 0.024643289343403662,
      "grad_norm": 0.282000333070755,
      "learning_rate": 2.9875000000000004e-05,
      "loss": 0.8005,
      "step": 805
    },
    {
      "epoch": 0.02467390212519671,
      "grad_norm": 0.555347204208374,
      "learning_rate": 2.985e-05,
      "loss": 0.9079,
      "step": 806
    },
    {
      "epoch": 0.024704514906989758,
      "grad_norm": 0.1935003697872162,
      "learning_rate": 2.9825000000000003e-05,
      "loss": 0.7709,
      "step": 807
    },
    {
      "epoch": 0.024735127688782808,
      "grad_norm": 1.0484901666641235,
      "learning_rate": 2.98e-05,
      "loss": 0.9013,
      "step": 808
    },
    {
      "epoch": 0.024765740470575854,
      "grad_norm": 0.21082039177417755,
      "learning_rate": 2.9775000000000002e-05,
      "loss": 0.6987,
      "step": 809
    },
    {
      "epoch": 0.024796353252368904,
      "grad_norm": 0.2949369549751282,
      "learning_rate": 2.975e-05,
      "loss": 0.803,
      "step": 810
    },
    {
      "epoch": 0.02482696603416195,
      "grad_norm": 0.25005561113357544,
      "learning_rate": 2.9725000000000004e-05,
      "loss": 0.7755,
      "step": 811
    },
    {
      "epoch": 0.024857578815955,
      "grad_norm": 0.2735678553581238,
      "learning_rate": 2.97e-05,
      "loss": 0.832,
      "step": 812
    },
    {
      "epoch": 0.024888191597748046,
      "grad_norm": 0.3447706401348114,
      "learning_rate": 2.9675000000000003e-05,
      "loss": 0.8904,
      "step": 813
    },
    {
      "epoch": 0.024918804379541096,
      "grad_norm": 1.2564961910247803,
      "learning_rate": 2.965e-05,
      "loss": 0.8644,
      "step": 814
    },
    {
      "epoch": 0.024949417161334142,
      "grad_norm": 0.2139745056629181,
      "learning_rate": 2.9625000000000002e-05,
      "loss": 0.8799,
      "step": 815
    },
    {
      "epoch": 0.024980029943127192,
      "grad_norm": 0.26508721709251404,
      "learning_rate": 2.96e-05,
      "loss": 0.9586,
      "step": 816
    },
    {
      "epoch": 0.025010642724920238,
      "grad_norm": 0.2845189869403839,
      "learning_rate": 2.9575000000000004e-05,
      "loss": 0.7926,
      "step": 817
    },
    {
      "epoch": 0.025041255506713288,
      "grad_norm": 0.2298937886953354,
      "learning_rate": 2.955e-05,
      "loss": 0.7427,
      "step": 818
    },
    {
      "epoch": 0.025071868288506334,
      "grad_norm": 0.22173981368541718,
      "learning_rate": 2.9525000000000003e-05,
      "loss": 0.7012,
      "step": 819
    },
    {
      "epoch": 0.025102481070299384,
      "grad_norm": 0.33899015188217163,
      "learning_rate": 2.95e-05,
      "loss": 0.8489,
      "step": 820
    },
    {
      "epoch": 0.02513309385209243,
      "grad_norm": 0.39261946082115173,
      "learning_rate": 2.9475e-05,
      "loss": 0.9783,
      "step": 821
    },
    {
      "epoch": 0.02516370663388548,
      "grad_norm": 0.47273534536361694,
      "learning_rate": 2.945e-05,
      "loss": 0.9067,
      "step": 822
    },
    {
      "epoch": 0.025194319415678526,
      "grad_norm": 0.19720178842544556,
      "learning_rate": 2.9425000000000004e-05,
      "loss": 0.6649,
      "step": 823
    },
    {
      "epoch": 0.025224932197471576,
      "grad_norm": 0.2107517272233963,
      "learning_rate": 2.94e-05,
      "loss": 0.7944,
      "step": 824
    },
    {
      "epoch": 0.025255544979264622,
      "grad_norm": 0.20002183318138123,
      "learning_rate": 2.9375000000000003e-05,
      "loss": 0.7024,
      "step": 825
    },
    {
      "epoch": 0.025286157761057672,
      "grad_norm": 0.731521487236023,
      "learning_rate": 2.935e-05,
      "loss": 0.7013,
      "step": 826
    },
    {
      "epoch": 0.025316770542850718,
      "grad_norm": 0.25396400690078735,
      "learning_rate": 2.9325e-05,
      "loss": 0.6993,
      "step": 827
    },
    {
      "epoch": 0.025347383324643768,
      "grad_norm": 0.2594110071659088,
      "learning_rate": 2.93e-05,
      "loss": 0.7571,
      "step": 828
    },
    {
      "epoch": 0.025377996106436814,
      "grad_norm": 0.22885718941688538,
      "learning_rate": 2.9275000000000003e-05,
      "loss": 0.8889,
      "step": 829
    },
    {
      "epoch": 0.025408608888229864,
      "grad_norm": 0.2557504177093506,
      "learning_rate": 2.925e-05,
      "loss": 0.8159,
      "step": 830
    },
    {
      "epoch": 0.02543922167002291,
      "grad_norm": 0.22893664240837097,
      "learning_rate": 2.9225000000000002e-05,
      "loss": 0.739,
      "step": 831
    },
    {
      "epoch": 0.02546983445181596,
      "grad_norm": 0.21701563894748688,
      "learning_rate": 2.9199999999999998e-05,
      "loss": 0.7757,
      "step": 832
    },
    {
      "epoch": 0.025500447233609006,
      "grad_norm": 0.21525254845619202,
      "learning_rate": 2.9175e-05,
      "loss": 0.806,
      "step": 833
    },
    {
      "epoch": 0.025531060015402056,
      "grad_norm": 0.17862115800380707,
      "learning_rate": 2.915e-05,
      "loss": 0.729,
      "step": 834
    },
    {
      "epoch": 0.025561672797195102,
      "grad_norm": 0.29260683059692383,
      "learning_rate": 2.9125000000000003e-05,
      "loss": 0.7525,
      "step": 835
    },
    {
      "epoch": 0.025592285578988152,
      "grad_norm": 0.2695743441581726,
      "learning_rate": 2.91e-05,
      "loss": 0.7927,
      "step": 836
    },
    {
      "epoch": 0.0256228983607812,
      "grad_norm": 0.3860837519168854,
      "learning_rate": 2.9075000000000002e-05,
      "loss": 0.7629,
      "step": 837
    },
    {
      "epoch": 0.025653511142574248,
      "grad_norm": 0.3627798855304718,
      "learning_rate": 2.9049999999999998e-05,
      "loss": 0.8493,
      "step": 838
    },
    {
      "epoch": 0.025684123924367298,
      "grad_norm": 0.3303006887435913,
      "learning_rate": 2.9025e-05,
      "loss": 0.7858,
      "step": 839
    },
    {
      "epoch": 0.025714736706160344,
      "grad_norm": 0.28896212577819824,
      "learning_rate": 2.9e-05,
      "loss": 0.823,
      "step": 840
    },
    {
      "epoch": 0.025745349487953394,
      "grad_norm": 0.35820138454437256,
      "learning_rate": 2.8975000000000003e-05,
      "loss": 0.7952,
      "step": 841
    },
    {
      "epoch": 0.02577596226974644,
      "grad_norm": 0.43628787994384766,
      "learning_rate": 2.895e-05,
      "loss": 0.722,
      "step": 842
    },
    {
      "epoch": 0.02580657505153949,
      "grad_norm": 0.3466860353946686,
      "learning_rate": 2.8925000000000002e-05,
      "loss": 0.8141,
      "step": 843
    },
    {
      "epoch": 0.025837187833332536,
      "grad_norm": 0.28213992714881897,
      "learning_rate": 2.8899999999999998e-05,
      "loss": 0.8237,
      "step": 844
    },
    {
      "epoch": 0.025867800615125586,
      "grad_norm": 0.24975919723510742,
      "learning_rate": 2.8875e-05,
      "loss": 0.8129,
      "step": 845
    },
    {
      "epoch": 0.025898413396918632,
      "grad_norm": 0.2584918141365051,
      "learning_rate": 2.885e-05,
      "loss": 0.7068,
      "step": 846
    },
    {
      "epoch": 0.02592902617871168,
      "grad_norm": 0.24893394112586975,
      "learning_rate": 2.8825000000000003e-05,
      "loss": 0.8291,
      "step": 847
    },
    {
      "epoch": 0.025959638960504728,
      "grad_norm": 0.2726723849773407,
      "learning_rate": 2.88e-05,
      "loss": 0.7596,
      "step": 848
    },
    {
      "epoch": 0.025990251742297778,
      "grad_norm": 0.2568736970424652,
      "learning_rate": 2.8775e-05,
      "loss": 0.8701,
      "step": 849
    },
    {
      "epoch": 0.026020864524090824,
      "grad_norm": 0.23989631235599518,
      "learning_rate": 2.8749999999999997e-05,
      "loss": 0.903,
      "step": 850
    },
    {
      "epoch": 0.026051477305883874,
      "grad_norm": 0.2137984335422516,
      "learning_rate": 2.8725e-05,
      "loss": 0.8538,
      "step": 851
    },
    {
      "epoch": 0.02608209008767692,
      "grad_norm": 0.22058404982089996,
      "learning_rate": 2.87e-05,
      "loss": 0.6818,
      "step": 852
    },
    {
      "epoch": 0.02611270286946997,
      "grad_norm": 0.3256290555000305,
      "learning_rate": 2.8675000000000002e-05,
      "loss": 0.8044,
      "step": 853
    },
    {
      "epoch": 0.026143315651263016,
      "grad_norm": 0.2119532823562622,
      "learning_rate": 2.865e-05,
      "loss": 0.8822,
      "step": 854
    },
    {
      "epoch": 0.026173928433056066,
      "grad_norm": 0.24988698959350586,
      "learning_rate": 2.8625e-05,
      "loss": 0.7059,
      "step": 855
    },
    {
      "epoch": 0.026204541214849112,
      "grad_norm": 0.21754224598407745,
      "learning_rate": 2.86e-05,
      "loss": 0.8444,
      "step": 856
    },
    {
      "epoch": 0.026235153996642162,
      "grad_norm": 0.24350720643997192,
      "learning_rate": 2.8575000000000003e-05,
      "loss": 0.7566,
      "step": 857
    },
    {
      "epoch": 0.026265766778435208,
      "grad_norm": 0.17361341416835785,
      "learning_rate": 2.855e-05,
      "loss": 0.8549,
      "step": 858
    },
    {
      "epoch": 0.026296379560228258,
      "grad_norm": 0.8392402529716492,
      "learning_rate": 2.8525000000000002e-05,
      "loss": 0.8015,
      "step": 859
    },
    {
      "epoch": 0.026326992342021304,
      "grad_norm": 0.3251253664493561,
      "learning_rate": 2.8499999999999998e-05,
      "loss": 0.7567,
      "step": 860
    },
    {
      "epoch": 0.026357605123814354,
      "grad_norm": 0.2708291709423065,
      "learning_rate": 2.8475e-05,
      "loss": 0.6809,
      "step": 861
    },
    {
      "epoch": 0.0263882179056074,
      "grad_norm": 0.2126312553882599,
      "learning_rate": 2.845e-05,
      "loss": 0.6958,
      "step": 862
    },
    {
      "epoch": 0.02641883068740045,
      "grad_norm": 0.3463224172592163,
      "learning_rate": 2.8425000000000003e-05,
      "loss": 1.0106,
      "step": 863
    },
    {
      "epoch": 0.026449443469193496,
      "grad_norm": 0.24617327749729156,
      "learning_rate": 2.84e-05,
      "loss": 0.7356,
      "step": 864
    },
    {
      "epoch": 0.026480056250986546,
      "grad_norm": 0.280312716960907,
      "learning_rate": 2.8375000000000002e-05,
      "loss": 0.8023,
      "step": 865
    },
    {
      "epoch": 0.026510669032779592,
      "grad_norm": 0.2148461788892746,
      "learning_rate": 2.8349999999999998e-05,
      "loss": 0.6696,
      "step": 866
    },
    {
      "epoch": 0.026541281814572642,
      "grad_norm": 0.289033442735672,
      "learning_rate": 2.8325e-05,
      "loss": 0.7107,
      "step": 867
    },
    {
      "epoch": 0.026571894596365688,
      "grad_norm": 0.31490424275398254,
      "learning_rate": 2.83e-05,
      "loss": 0.7464,
      "step": 868
    },
    {
      "epoch": 0.026602507378158738,
      "grad_norm": 0.7208364009857178,
      "learning_rate": 2.8275000000000003e-05,
      "loss": 0.8436,
      "step": 869
    },
    {
      "epoch": 0.026633120159951784,
      "grad_norm": 0.15875181555747986,
      "learning_rate": 2.825e-05,
      "loss": 0.8084,
      "step": 870
    },
    {
      "epoch": 0.026663732941744834,
      "grad_norm": 0.32893168926239014,
      "learning_rate": 2.8225e-05,
      "loss": 0.8642,
      "step": 871
    },
    {
      "epoch": 0.02669434572353788,
      "grad_norm": 0.18762439489364624,
      "learning_rate": 2.8199999999999998e-05,
      "loss": 0.7975,
      "step": 872
    },
    {
      "epoch": 0.02672495850533093,
      "grad_norm": 0.2735452651977539,
      "learning_rate": 2.8175e-05,
      "loss": 0.7555,
      "step": 873
    },
    {
      "epoch": 0.026755571287123976,
      "grad_norm": 0.25614097714424133,
      "learning_rate": 2.815e-05,
      "loss": 0.8399,
      "step": 874
    },
    {
      "epoch": 0.026786184068917026,
      "grad_norm": 0.19414450228214264,
      "learning_rate": 2.8125000000000003e-05,
      "loss": 0.7666,
      "step": 875
    },
    {
      "epoch": 0.026816796850710072,
      "grad_norm": 0.27566370368003845,
      "learning_rate": 2.8100000000000005e-05,
      "loss": 0.9132,
      "step": 876
    },
    {
      "epoch": 0.026847409632503122,
      "grad_norm": 0.23625831305980682,
      "learning_rate": 2.8075e-05,
      "loss": 0.7944,
      "step": 877
    },
    {
      "epoch": 0.026878022414296168,
      "grad_norm": 0.4745585024356842,
      "learning_rate": 2.8050000000000004e-05,
      "loss": 0.7296,
      "step": 878
    },
    {
      "epoch": 0.026908635196089218,
      "grad_norm": 0.1796225905418396,
      "learning_rate": 2.8025e-05,
      "loss": 0.7673,
      "step": 879
    },
    {
      "epoch": 0.026939247977882264,
      "grad_norm": 0.24837301671504974,
      "learning_rate": 2.8000000000000003e-05,
      "loss": 0.7254,
      "step": 880
    },
    {
      "epoch": 0.026969860759675314,
      "grad_norm": 0.4500615894794464,
      "learning_rate": 2.7975000000000002e-05,
      "loss": 0.7211,
      "step": 881
    },
    {
      "epoch": 0.02700047354146836,
      "grad_norm": 0.23253771662712097,
      "learning_rate": 2.7950000000000005e-05,
      "loss": 0.774,
      "step": 882
    },
    {
      "epoch": 0.02703108632326141,
      "grad_norm": 0.23589996993541718,
      "learning_rate": 2.7925e-05,
      "loss": 0.743,
      "step": 883
    },
    {
      "epoch": 0.027061699105054456,
      "grad_norm": 0.23271964490413666,
      "learning_rate": 2.7900000000000004e-05,
      "loss": 0.7926,
      "step": 884
    },
    {
      "epoch": 0.027092311886847506,
      "grad_norm": 0.19415795803070068,
      "learning_rate": 2.7875e-05,
      "loss": 0.8051,
      "step": 885
    },
    {
      "epoch": 0.027122924668640552,
      "grad_norm": 0.25722768902778625,
      "learning_rate": 2.7850000000000003e-05,
      "loss": 0.774,
      "step": 886
    },
    {
      "epoch": 0.027153537450433602,
      "grad_norm": 0.17887739837169647,
      "learning_rate": 2.7825000000000002e-05,
      "loss": 0.6716,
      "step": 887
    },
    {
      "epoch": 0.02718415023222665,
      "grad_norm": 0.18311217427253723,
      "learning_rate": 2.7800000000000005e-05,
      "loss": 0.6188,
      "step": 888
    },
    {
      "epoch": 0.027214763014019698,
      "grad_norm": 0.2779267728328705,
      "learning_rate": 2.7775e-05,
      "loss": 0.8355,
      "step": 889
    },
    {
      "epoch": 0.027245375795812744,
      "grad_norm": 0.25508707761764526,
      "learning_rate": 2.7750000000000004e-05,
      "loss": 0.7415,
      "step": 890
    },
    {
      "epoch": 0.027275988577605794,
      "grad_norm": 0.6729469299316406,
      "learning_rate": 2.7725e-05,
      "loss": 0.7703,
      "step": 891
    },
    {
      "epoch": 0.02730660135939884,
      "grad_norm": 0.20492440462112427,
      "learning_rate": 2.7700000000000002e-05,
      "loss": 0.6994,
      "step": 892
    },
    {
      "epoch": 0.02733721414119189,
      "grad_norm": 0.18655037879943848,
      "learning_rate": 2.7675000000000002e-05,
      "loss": 0.661,
      "step": 893
    },
    {
      "epoch": 0.027367826922984936,
      "grad_norm": 0.18032127618789673,
      "learning_rate": 2.7650000000000005e-05,
      "loss": 0.8274,
      "step": 894
    },
    {
      "epoch": 0.027398439704777986,
      "grad_norm": 0.24967481195926666,
      "learning_rate": 2.7625e-05,
      "loss": 0.7497,
      "step": 895
    },
    {
      "epoch": 0.027429052486571032,
      "grad_norm": 0.2341681867837906,
      "learning_rate": 2.7600000000000003e-05,
      "loss": 0.8267,
      "step": 896
    },
    {
      "epoch": 0.027459665268364082,
      "grad_norm": 0.2393629550933838,
      "learning_rate": 2.7575e-05,
      "loss": 0.7739,
      "step": 897
    },
    {
      "epoch": 0.02749027805015713,
      "grad_norm": 0.27878737449645996,
      "learning_rate": 2.7550000000000002e-05,
      "loss": 0.7999,
      "step": 898
    },
    {
      "epoch": 0.027520890831950178,
      "grad_norm": 0.2464660257101059,
      "learning_rate": 2.7525e-05,
      "loss": 0.7272,
      "step": 899
    },
    {
      "epoch": 0.027551503613743224,
      "grad_norm": 0.19899560511112213,
      "learning_rate": 2.7500000000000004e-05,
      "loss": 0.6452,
      "step": 900
    },
    {
      "epoch": 0.027582116395536274,
      "grad_norm": 0.24620629847049713,
      "learning_rate": 2.7475e-05,
      "loss": 0.7048,
      "step": 901
    },
    {
      "epoch": 0.02761272917732932,
      "grad_norm": 0.35517385601997375,
      "learning_rate": 2.7450000000000003e-05,
      "loss": 0.8296,
      "step": 902
    },
    {
      "epoch": 0.02764334195912237,
      "grad_norm": 2.92924165725708,
      "learning_rate": 2.7425e-05,
      "loss": 0.6455,
      "step": 903
    },
    {
      "epoch": 0.027673954740915416,
      "grad_norm": 0.2203425168991089,
      "learning_rate": 2.7400000000000002e-05,
      "loss": 0.8444,
      "step": 904
    },
    {
      "epoch": 0.027704567522708466,
      "grad_norm": 0.35871022939682007,
      "learning_rate": 2.7375e-05,
      "loss": 0.9032,
      "step": 905
    },
    {
      "epoch": 0.027735180304501512,
      "grad_norm": 0.2167324721813202,
      "learning_rate": 2.7350000000000004e-05,
      "loss": 0.7407,
      "step": 906
    },
    {
      "epoch": 0.027765793086294562,
      "grad_norm": 0.20071963965892792,
      "learning_rate": 2.7325e-05,
      "loss": 0.755,
      "step": 907
    },
    {
      "epoch": 0.02779640586808761,
      "grad_norm": 0.19762741029262543,
      "learning_rate": 2.7300000000000003e-05,
      "loss": 0.7355,
      "step": 908
    },
    {
      "epoch": 0.027827018649880658,
      "grad_norm": 0.21306392550468445,
      "learning_rate": 2.7275e-05,
      "loss": 0.8227,
      "step": 909
    },
    {
      "epoch": 0.027857631431673704,
      "grad_norm": 0.2513431906700134,
      "learning_rate": 2.725e-05,
      "loss": 0.6616,
      "step": 910
    },
    {
      "epoch": 0.027888244213466754,
      "grad_norm": 0.42229893803596497,
      "learning_rate": 2.7225e-05,
      "loss": 0.858,
      "step": 911
    },
    {
      "epoch": 0.0279188569952598,
      "grad_norm": 0.31944870948791504,
      "learning_rate": 2.7200000000000004e-05,
      "loss": 0.8134,
      "step": 912
    },
    {
      "epoch": 0.02794946977705285,
      "grad_norm": 0.2738754153251648,
      "learning_rate": 2.7175e-05,
      "loss": 0.6983,
      "step": 913
    },
    {
      "epoch": 0.027980082558845897,
      "grad_norm": 0.2120870053768158,
      "learning_rate": 2.7150000000000003e-05,
      "loss": 0.7372,
      "step": 914
    },
    {
      "epoch": 0.028010695340638946,
      "grad_norm": 0.19120444357395172,
      "learning_rate": 2.7125000000000002e-05,
      "loss": 0.6962,
      "step": 915
    },
    {
      "epoch": 0.028041308122431993,
      "grad_norm": 0.23506103456020355,
      "learning_rate": 2.7100000000000005e-05,
      "loss": 0.7198,
      "step": 916
    },
    {
      "epoch": 0.028071920904225042,
      "grad_norm": 0.24480918049812317,
      "learning_rate": 2.7075e-05,
      "loss": 0.8242,
      "step": 917
    },
    {
      "epoch": 0.02810253368601809,
      "grad_norm": 0.2311209887266159,
      "learning_rate": 2.7050000000000004e-05,
      "loss": 0.8522,
      "step": 918
    },
    {
      "epoch": 0.02813314646781114,
      "grad_norm": 0.23074816167354584,
      "learning_rate": 2.7025e-05,
      "loss": 0.8246,
      "step": 919
    },
    {
      "epoch": 0.028163759249604188,
      "grad_norm": 0.2711634635925293,
      "learning_rate": 2.7000000000000002e-05,
      "loss": 0.7097,
      "step": 920
    },
    {
      "epoch": 0.028194372031397234,
      "grad_norm": 0.36720243096351624,
      "learning_rate": 2.6975000000000002e-05,
      "loss": 0.7522,
      "step": 921
    },
    {
      "epoch": 0.028224984813190284,
      "grad_norm": 0.521653950214386,
      "learning_rate": 2.6950000000000005e-05,
      "loss": 0.7544,
      "step": 922
    },
    {
      "epoch": 0.02825559759498333,
      "grad_norm": 0.28770723938941956,
      "learning_rate": 2.6925e-05,
      "loss": 0.8676,
      "step": 923
    },
    {
      "epoch": 0.02828621037677638,
      "grad_norm": 0.4866067171096802,
      "learning_rate": 2.6900000000000003e-05,
      "loss": 0.7629,
      "step": 924
    },
    {
      "epoch": 0.028316823158569426,
      "grad_norm": 0.2594362497329712,
      "learning_rate": 2.6875e-05,
      "loss": 0.735,
      "step": 925
    },
    {
      "epoch": 0.028347435940362476,
      "grad_norm": 0.17074766755104065,
      "learning_rate": 2.6850000000000002e-05,
      "loss": 0.6295,
      "step": 926
    },
    {
      "epoch": 0.028378048722155522,
      "grad_norm": 0.25635072588920593,
      "learning_rate": 2.6825e-05,
      "loss": 0.8799,
      "step": 927
    },
    {
      "epoch": 0.028408661503948572,
      "grad_norm": 0.2679007053375244,
      "learning_rate": 2.6800000000000004e-05,
      "loss": 0.9091,
      "step": 928
    },
    {
      "epoch": 0.02843927428574162,
      "grad_norm": 0.1732785403728485,
      "learning_rate": 2.6775e-05,
      "loss": 0.7626,
      "step": 929
    },
    {
      "epoch": 0.028469887067534668,
      "grad_norm": 0.18113206326961517,
      "learning_rate": 2.6750000000000003e-05,
      "loss": 0.6877,
      "step": 930
    },
    {
      "epoch": 0.028500499849327714,
      "grad_norm": 0.3979860246181488,
      "learning_rate": 2.6725e-05,
      "loss": 0.8334,
      "step": 931
    },
    {
      "epoch": 0.028531112631120764,
      "grad_norm": 0.2427201271057129,
      "learning_rate": 2.6700000000000002e-05,
      "loss": 0.6888,
      "step": 932
    },
    {
      "epoch": 0.02856172541291381,
      "grad_norm": 0.25827473402023315,
      "learning_rate": 2.6675e-05,
      "loss": 0.6342,
      "step": 933
    },
    {
      "epoch": 0.02859233819470686,
      "grad_norm": 0.22778457403182983,
      "learning_rate": 2.6650000000000004e-05,
      "loss": 0.8928,
      "step": 934
    },
    {
      "epoch": 0.028622950976499906,
      "grad_norm": 0.38989055156707764,
      "learning_rate": 2.6625e-05,
      "loss": 0.7361,
      "step": 935
    },
    {
      "epoch": 0.028653563758292956,
      "grad_norm": 0.24756157398223877,
      "learning_rate": 2.6600000000000003e-05,
      "loss": 0.6942,
      "step": 936
    },
    {
      "epoch": 0.028684176540086002,
      "grad_norm": 0.2753722369670868,
      "learning_rate": 2.6575e-05,
      "loss": 0.7076,
      "step": 937
    },
    {
      "epoch": 0.028714789321879052,
      "grad_norm": 0.20845873653888702,
      "learning_rate": 2.655e-05,
      "loss": 0.7941,
      "step": 938
    },
    {
      "epoch": 0.0287454021036721,
      "grad_norm": 0.6064780950546265,
      "learning_rate": 2.6525e-05,
      "loss": 0.8253,
      "step": 939
    },
    {
      "epoch": 0.028776014885465148,
      "grad_norm": 0.2493230253458023,
      "learning_rate": 2.6500000000000004e-05,
      "loss": 0.7033,
      "step": 940
    },
    {
      "epoch": 0.028806627667258194,
      "grad_norm": 0.17641626298427582,
      "learning_rate": 2.6475e-05,
      "loss": 0.7805,
      "step": 941
    },
    {
      "epoch": 0.028837240449051244,
      "grad_norm": 0.26349306106567383,
      "learning_rate": 2.6450000000000003e-05,
      "loss": 0.8456,
      "step": 942
    },
    {
      "epoch": 0.02886785323084429,
      "grad_norm": 0.2395259141921997,
      "learning_rate": 2.6425e-05,
      "loss": 0.8025,
      "step": 943
    },
    {
      "epoch": 0.02889846601263734,
      "grad_norm": 0.21292027831077576,
      "learning_rate": 2.64e-05,
      "loss": 0.7775,
      "step": 944
    },
    {
      "epoch": 0.028929078794430386,
      "grad_norm": 0.22842593491077423,
      "learning_rate": 2.6375e-05,
      "loss": 0.8015,
      "step": 945
    },
    {
      "epoch": 0.028959691576223436,
      "grad_norm": 0.2452908605337143,
      "learning_rate": 2.6350000000000004e-05,
      "loss": 0.7594,
      "step": 946
    },
    {
      "epoch": 0.028990304358016483,
      "grad_norm": 0.1772928386926651,
      "learning_rate": 2.6325e-05,
      "loss": 0.7773,
      "step": 947
    },
    {
      "epoch": 0.029020917139809532,
      "grad_norm": 0.1706618219614029,
      "learning_rate": 2.6300000000000002e-05,
      "loss": 0.6592,
      "step": 948
    },
    {
      "epoch": 0.02905152992160258,
      "grad_norm": 0.16842590272426605,
      "learning_rate": 2.6275e-05,
      "loss": 0.6927,
      "step": 949
    },
    {
      "epoch": 0.029082142703395628,
      "grad_norm": 0.3888902962207794,
      "learning_rate": 2.625e-05,
      "loss": 0.8613,
      "step": 950
    },
    {
      "epoch": 0.029112755485188675,
      "grad_norm": 0.2542911469936371,
      "learning_rate": 2.6225e-05,
      "loss": 0.9262,
      "step": 951
    },
    {
      "epoch": 0.029143368266981724,
      "grad_norm": 0.31784671545028687,
      "learning_rate": 2.6200000000000003e-05,
      "loss": 0.7105,
      "step": 952
    },
    {
      "epoch": 0.02917398104877477,
      "grad_norm": 0.36050692200660706,
      "learning_rate": 2.6175e-05,
      "loss": 0.8345,
      "step": 953
    },
    {
      "epoch": 0.02920459383056782,
      "grad_norm": 0.3212490677833557,
      "learning_rate": 2.6150000000000002e-05,
      "loss": 0.8057,
      "step": 954
    },
    {
      "epoch": 0.029235206612360867,
      "grad_norm": 0.3718416690826416,
      "learning_rate": 2.6124999999999998e-05,
      "loss": 0.8105,
      "step": 955
    },
    {
      "epoch": 0.029265819394153916,
      "grad_norm": 0.2504112422466278,
      "learning_rate": 2.61e-05,
      "loss": 0.7053,
      "step": 956
    },
    {
      "epoch": 0.029296432175946963,
      "grad_norm": 0.15486174821853638,
      "learning_rate": 2.6075e-05,
      "loss": 0.763,
      "step": 957
    },
    {
      "epoch": 0.029327044957740012,
      "grad_norm": 0.15646636486053467,
      "learning_rate": 2.6050000000000003e-05,
      "loss": 0.7328,
      "step": 958
    },
    {
      "epoch": 0.02935765773953306,
      "grad_norm": 0.260026752948761,
      "learning_rate": 2.6025e-05,
      "loss": 0.6958,
      "step": 959
    },
    {
      "epoch": 0.02938827052132611,
      "grad_norm": 0.2180502563714981,
      "learning_rate": 2.6000000000000002e-05,
      "loss": 0.7695,
      "step": 960
    },
    {
      "epoch": 0.029418883303119155,
      "grad_norm": 0.15513451397418976,
      "learning_rate": 2.5974999999999998e-05,
      "loss": 0.6699,
      "step": 961
    },
    {
      "epoch": 0.029449496084912204,
      "grad_norm": 0.22157728672027588,
      "learning_rate": 2.595e-05,
      "loss": 0.9339,
      "step": 962
    },
    {
      "epoch": 0.02948010886670525,
      "grad_norm": 0.256274938583374,
      "learning_rate": 2.5925e-05,
      "loss": 0.6754,
      "step": 963
    },
    {
      "epoch": 0.0295107216484983,
      "grad_norm": 0.2746959328651428,
      "learning_rate": 2.5900000000000003e-05,
      "loss": 0.7969,
      "step": 964
    },
    {
      "epoch": 0.029541334430291347,
      "grad_norm": 0.1856250911951065,
      "learning_rate": 2.5875e-05,
      "loss": 0.5904,
      "step": 965
    },
    {
      "epoch": 0.029571947212084396,
      "grad_norm": 0.23312164843082428,
      "learning_rate": 2.585e-05,
      "loss": 0.7987,
      "step": 966
    },
    {
      "epoch": 0.029602559993877443,
      "grad_norm": 0.40545573830604553,
      "learning_rate": 2.5824999999999998e-05,
      "loss": 0.8105,
      "step": 967
    },
    {
      "epoch": 0.029633172775670492,
      "grad_norm": 0.16560013592243195,
      "learning_rate": 2.58e-05,
      "loss": 0.6502,
      "step": 968
    },
    {
      "epoch": 0.02966378555746354,
      "grad_norm": 0.24401192367076874,
      "learning_rate": 2.5775e-05,
      "loss": 0.8598,
      "step": 969
    },
    {
      "epoch": 0.02969439833925659,
      "grad_norm": 0.2053084373474121,
      "learning_rate": 2.5750000000000002e-05,
      "loss": 0.6596,
      "step": 970
    },
    {
      "epoch": 0.029725011121049635,
      "grad_norm": 0.23731806874275208,
      "learning_rate": 2.5725e-05,
      "loss": 0.6414,
      "step": 971
    },
    {
      "epoch": 0.029755623902842684,
      "grad_norm": 0.2742619216442108,
      "learning_rate": 2.57e-05,
      "loss": 0.8714,
      "step": 972
    },
    {
      "epoch": 0.02978623668463573,
      "grad_norm": 0.17081834375858307,
      "learning_rate": 2.5675e-05,
      "loss": 0.7541,
      "step": 973
    },
    {
      "epoch": 0.02981684946642878,
      "grad_norm": 0.22981511056423187,
      "learning_rate": 2.5650000000000003e-05,
      "loss": 0.7805,
      "step": 974
    },
    {
      "epoch": 0.029847462248221827,
      "grad_norm": 0.2610664665699005,
      "learning_rate": 2.5625e-05,
      "loss": 0.82,
      "step": 975
    },
    {
      "epoch": 0.029878075030014876,
      "grad_norm": 0.24400705099105835,
      "learning_rate": 2.5600000000000002e-05,
      "loss": 0.8744,
      "step": 976
    },
    {
      "epoch": 0.029908687811807923,
      "grad_norm": 0.20337677001953125,
      "learning_rate": 2.5574999999999998e-05,
      "loss": 0.7776,
      "step": 977
    },
    {
      "epoch": 0.029939300593600972,
      "grad_norm": 0.21076776087284088,
      "learning_rate": 2.555e-05,
      "loss": 0.8562,
      "step": 978
    },
    {
      "epoch": 0.02996991337539402,
      "grad_norm": 0.3402242064476013,
      "learning_rate": 2.5525e-05,
      "loss": 0.8104,
      "step": 979
    },
    {
      "epoch": 0.03000052615718707,
      "grad_norm": 0.2519756257534027,
      "learning_rate": 2.5500000000000003e-05,
      "loss": 0.8103,
      "step": 980
    },
    {
      "epoch": 0.030031138938980115,
      "grad_norm": 0.3576935827732086,
      "learning_rate": 2.5475e-05,
      "loss": 0.6809,
      "step": 981
    },
    {
      "epoch": 0.030061751720773165,
      "grad_norm": 0.40246832370758057,
      "learning_rate": 2.5450000000000002e-05,
      "loss": 0.7815,
      "step": 982
    },
    {
      "epoch": 0.03009236450256621,
      "grad_norm": 0.2965378165245056,
      "learning_rate": 2.5424999999999998e-05,
      "loss": 0.8418,
      "step": 983
    },
    {
      "epoch": 0.03012297728435926,
      "grad_norm": 0.36577507853507996,
      "learning_rate": 2.54e-05,
      "loss": 0.7802,
      "step": 984
    },
    {
      "epoch": 0.030153590066152307,
      "grad_norm": 0.2143782377243042,
      "learning_rate": 2.5375e-05,
      "loss": 0.6816,
      "step": 985
    },
    {
      "epoch": 0.030184202847945357,
      "grad_norm": 0.4029542803764343,
      "learning_rate": 2.5350000000000003e-05,
      "loss": 1.0288,
      "step": 986
    },
    {
      "epoch": 0.030214815629738403,
      "grad_norm": 0.28064608573913574,
      "learning_rate": 2.5325e-05,
      "loss": 0.7074,
      "step": 987
    },
    {
      "epoch": 0.030245428411531453,
      "grad_norm": 0.2088554948568344,
      "learning_rate": 2.5300000000000002e-05,
      "loss": 0.8928,
      "step": 988
    },
    {
      "epoch": 0.0302760411933245,
      "grad_norm": 0.3825061023235321,
      "learning_rate": 2.5274999999999998e-05,
      "loss": 0.8885,
      "step": 989
    },
    {
      "epoch": 0.03030665397511755,
      "grad_norm": 0.32542598247528076,
      "learning_rate": 2.525e-05,
      "loss": 0.7423,
      "step": 990
    },
    {
      "epoch": 0.030337266756910595,
      "grad_norm": 0.2502538561820984,
      "learning_rate": 2.5225e-05,
      "loss": 0.7361,
      "step": 991
    },
    {
      "epoch": 0.030367879538703645,
      "grad_norm": 1.280521035194397,
      "learning_rate": 2.5200000000000003e-05,
      "loss": 0.6576,
      "step": 992
    },
    {
      "epoch": 0.03039849232049669,
      "grad_norm": 0.29967001080513,
      "learning_rate": 2.5175e-05,
      "loss": 0.7351,
      "step": 993
    },
    {
      "epoch": 0.03042910510228974,
      "grad_norm": 0.2016165405511856,
      "learning_rate": 2.515e-05,
      "loss": 0.7163,
      "step": 994
    },
    {
      "epoch": 0.030459717884082787,
      "grad_norm": 0.3274289667606354,
      "learning_rate": 2.5124999999999997e-05,
      "loss": 0.6128,
      "step": 995
    },
    {
      "epoch": 0.030490330665875837,
      "grad_norm": 0.23741503059864044,
      "learning_rate": 2.51e-05,
      "loss": 0.8194,
      "step": 996
    },
    {
      "epoch": 0.030520943447668883,
      "grad_norm": 0.21378040313720703,
      "learning_rate": 2.5075e-05,
      "loss": 0.7401,
      "step": 997
    },
    {
      "epoch": 0.030551556229461933,
      "grad_norm": 0.17600701749324799,
      "learning_rate": 2.5050000000000002e-05,
      "loss": 0.8213,
      "step": 998
    },
    {
      "epoch": 0.03058216901125498,
      "grad_norm": 0.24449259042739868,
      "learning_rate": 2.5025e-05,
      "loss": 0.7184,
      "step": 999
    },
    {
      "epoch": 0.03061278179304803,
      "grad_norm": 0.2805769741535187,
      "learning_rate": 2.5e-05,
      "loss": 0.6921,
      "step": 1000
    },
    {
      "epoch": 0.030643394574841075,
      "grad_norm": 0.2672470510005951,
      "learning_rate": 2.4975e-05,
      "loss": 0.8719,
      "step": 1001
    },
    {
      "epoch": 0.030674007356634125,
      "grad_norm": 0.1998967081308365,
      "learning_rate": 2.495e-05,
      "loss": 0.7172,
      "step": 1002
    },
    {
      "epoch": 0.03070462013842717,
      "grad_norm": 0.30733510851860046,
      "learning_rate": 2.4925000000000003e-05,
      "loss": 0.8658,
      "step": 1003
    },
    {
      "epoch": 0.03073523292022022,
      "grad_norm": 0.2436271458864212,
      "learning_rate": 2.4900000000000002e-05,
      "loss": 0.8209,
      "step": 1004
    },
    {
      "epoch": 0.03076584570201327,
      "grad_norm": 0.23676183819770813,
      "learning_rate": 2.4875e-05,
      "loss": 0.7746,
      "step": 1005
    },
    {
      "epoch": 0.030796458483806317,
      "grad_norm": 0.2586055099964142,
      "learning_rate": 2.485e-05,
      "loss": 0.7883,
      "step": 1006
    },
    {
      "epoch": 0.030827071265599366,
      "grad_norm": 0.23301450908184052,
      "learning_rate": 2.4825e-05,
      "loss": 0.7684,
      "step": 1007
    },
    {
      "epoch": 0.030857684047392413,
      "grad_norm": 0.364602267742157,
      "learning_rate": 2.48e-05,
      "loss": 0.6985,
      "step": 1008
    },
    {
      "epoch": 0.030888296829185462,
      "grad_norm": 0.17294137179851532,
      "learning_rate": 2.4775000000000003e-05,
      "loss": 0.7668,
      "step": 1009
    },
    {
      "epoch": 0.03091890961097851,
      "grad_norm": 0.3043336272239685,
      "learning_rate": 2.4750000000000002e-05,
      "loss": 0.7448,
      "step": 1010
    },
    {
      "epoch": 0.03094952239277156,
      "grad_norm": 0.17291729152202606,
      "learning_rate": 2.4725e-05,
      "loss": 0.6163,
      "step": 1011
    },
    {
      "epoch": 0.030980135174564605,
      "grad_norm": 0.6973847150802612,
      "learning_rate": 2.47e-05,
      "loss": 0.7235,
      "step": 1012
    },
    {
      "epoch": 0.031010747956357654,
      "grad_norm": 0.40008077025413513,
      "learning_rate": 2.4675e-05,
      "loss": 0.6972,
      "step": 1013
    },
    {
      "epoch": 0.0310413607381507,
      "grad_norm": 0.2192194163799286,
      "learning_rate": 2.465e-05,
      "loss": 0.7977,
      "step": 1014
    },
    {
      "epoch": 0.03107197351994375,
      "grad_norm": 0.24331852793693542,
      "learning_rate": 2.4625000000000002e-05,
      "loss": 0.7921,
      "step": 1015
    },
    {
      "epoch": 0.031102586301736797,
      "grad_norm": 0.23280198872089386,
      "learning_rate": 2.46e-05,
      "loss": 0.7266,
      "step": 1016
    },
    {
      "epoch": 0.031133199083529847,
      "grad_norm": 0.201460063457489,
      "learning_rate": 2.4575e-05,
      "loss": 0.8533,
      "step": 1017
    },
    {
      "epoch": 0.031163811865322893,
      "grad_norm": 0.2165437787771225,
      "learning_rate": 2.455e-05,
      "loss": 0.6918,
      "step": 1018
    },
    {
      "epoch": 0.031194424647115943,
      "grad_norm": 0.2895566523075104,
      "learning_rate": 2.4525e-05,
      "loss": 0.78,
      "step": 1019
    },
    {
      "epoch": 0.03122503742890899,
      "grad_norm": 0.1697976440191269,
      "learning_rate": 2.45e-05,
      "loss": 0.6894,
      "step": 1020
    },
    {
      "epoch": 0.031255650210702035,
      "grad_norm": 0.21625764667987823,
      "learning_rate": 2.4475000000000002e-05,
      "loss": 0.9442,
      "step": 1021
    },
    {
      "epoch": 0.03128626299249509,
      "grad_norm": 0.18262450397014618,
      "learning_rate": 2.445e-05,
      "loss": 0.8432,
      "step": 1022
    },
    {
      "epoch": 0.031316875774288135,
      "grad_norm": 1.3687493801116943,
      "learning_rate": 2.4425e-05,
      "loss": 0.6762,
      "step": 1023
    },
    {
      "epoch": 0.03134748855608118,
      "grad_norm": 0.1907907873392105,
      "learning_rate": 2.44e-05,
      "loss": 0.6923,
      "step": 1024
    },
    {
      "epoch": 0.03137810133787423,
      "grad_norm": 0.24999335408210754,
      "learning_rate": 2.4375e-05,
      "loss": 0.7202,
      "step": 1025
    },
    {
      "epoch": 0.03140871411966728,
      "grad_norm": 0.17868153750896454,
      "learning_rate": 2.435e-05,
      "loss": 0.7341,
      "step": 1026
    },
    {
      "epoch": 0.03143932690146033,
      "grad_norm": 0.2665456235408783,
      "learning_rate": 2.4325000000000002e-05,
      "loss": 0.8267,
      "step": 1027
    },
    {
      "epoch": 0.03146993968325337,
      "grad_norm": 0.19612860679626465,
      "learning_rate": 2.43e-05,
      "loss": 0.6971,
      "step": 1028
    },
    {
      "epoch": 0.03150055246504642,
      "grad_norm": 0.26543185114860535,
      "learning_rate": 2.4275e-05,
      "loss": 0.6859,
      "step": 1029
    },
    {
      "epoch": 0.03153116524683947,
      "grad_norm": 0.339065283536911,
      "learning_rate": 2.425e-05,
      "loss": 0.8152,
      "step": 1030
    },
    {
      "epoch": 0.03156177802863252,
      "grad_norm": 0.5531843900680542,
      "learning_rate": 2.4225e-05,
      "loss": 0.6291,
      "step": 1031
    },
    {
      "epoch": 0.031592390810425565,
      "grad_norm": 0.2390822172164917,
      "learning_rate": 2.4200000000000002e-05,
      "loss": 0.7258,
      "step": 1032
    },
    {
      "epoch": 0.03162300359221861,
      "grad_norm": 0.17494240403175354,
      "learning_rate": 2.4175e-05,
      "loss": 0.8191,
      "step": 1033
    },
    {
      "epoch": 0.031653616374011664,
      "grad_norm": 0.2857089936733246,
      "learning_rate": 2.415e-05,
      "loss": 0.683,
      "step": 1034
    },
    {
      "epoch": 0.03168422915580471,
      "grad_norm": 0.23992601037025452,
      "learning_rate": 2.4125e-05,
      "loss": 0.7401,
      "step": 1035
    },
    {
      "epoch": 0.03171484193759776,
      "grad_norm": 0.3536628484725952,
      "learning_rate": 2.41e-05,
      "loss": 0.7742,
      "step": 1036
    },
    {
      "epoch": 0.0317454547193908,
      "grad_norm": 0.19519542157649994,
      "learning_rate": 2.4075e-05,
      "loss": 0.823,
      "step": 1037
    },
    {
      "epoch": 0.031776067501183856,
      "grad_norm": 0.24532130360603333,
      "learning_rate": 2.4050000000000002e-05,
      "loss": 0.7056,
      "step": 1038
    },
    {
      "epoch": 0.0318066802829769,
      "grad_norm": 0.21795259416103363,
      "learning_rate": 2.4025e-05,
      "loss": 0.989,
      "step": 1039
    },
    {
      "epoch": 0.03183729306476995,
      "grad_norm": 0.2766578495502472,
      "learning_rate": 2.4e-05,
      "loss": 0.7509,
      "step": 1040
    },
    {
      "epoch": 0.031867905846562995,
      "grad_norm": 0.35462427139282227,
      "learning_rate": 2.3975e-05,
      "loss": 0.8051,
      "step": 1041
    },
    {
      "epoch": 0.03189851862835605,
      "grad_norm": 0.3083861470222473,
      "learning_rate": 2.395e-05,
      "loss": 0.7584,
      "step": 1042
    },
    {
      "epoch": 0.031929131410149095,
      "grad_norm": 0.2157393991947174,
      "learning_rate": 2.3925e-05,
      "loss": 0.5724,
      "step": 1043
    },
    {
      "epoch": 0.03195974419194214,
      "grad_norm": 0.166135773062706,
      "learning_rate": 2.39e-05,
      "loss": 0.6327,
      "step": 1044
    },
    {
      "epoch": 0.03199035697373519,
      "grad_norm": 0.6858850121498108,
      "learning_rate": 2.3875e-05,
      "loss": 0.8305,
      "step": 1045
    },
    {
      "epoch": 0.03202096975552824,
      "grad_norm": 0.22477486729621887,
      "learning_rate": 2.385e-05,
      "loss": 0.7062,
      "step": 1046
    },
    {
      "epoch": 0.03205158253732129,
      "grad_norm": 0.32605013251304626,
      "learning_rate": 2.3825e-05,
      "loss": 0.7735,
      "step": 1047
    },
    {
      "epoch": 0.03208219531911433,
      "grad_norm": 0.3195783495903015,
      "learning_rate": 2.38e-05,
      "loss": 0.6371,
      "step": 1048
    },
    {
      "epoch": 0.03211280810090738,
      "grad_norm": 0.22577716410160065,
      "learning_rate": 2.3775e-05,
      "loss": 0.873,
      "step": 1049
    },
    {
      "epoch": 0.03214342088270043,
      "grad_norm": 0.25736093521118164,
      "learning_rate": 2.375e-05,
      "loss": 0.8905,
      "step": 1050
    },
    {
      "epoch": 0.03217403366449348,
      "grad_norm": 0.244774729013443,
      "learning_rate": 2.3725e-05,
      "loss": 0.743,
      "step": 1051
    },
    {
      "epoch": 0.032204646446286525,
      "grad_norm": 0.29097694158554077,
      "learning_rate": 2.37e-05,
      "loss": 0.7935,
      "step": 1052
    },
    {
      "epoch": 0.03223525922807957,
      "grad_norm": 0.7135207056999207,
      "learning_rate": 2.3675e-05,
      "loss": 0.6639,
      "step": 1053
    },
    {
      "epoch": 0.032265872009872625,
      "grad_norm": 1.418482780456543,
      "learning_rate": 2.365e-05,
      "loss": 0.6169,
      "step": 1054
    },
    {
      "epoch": 0.03229648479166567,
      "grad_norm": 0.20122043788433075,
      "learning_rate": 2.3624999999999998e-05,
      "loss": 0.7836,
      "step": 1055
    },
    {
      "epoch": 0.03232709757345872,
      "grad_norm": 0.344415545463562,
      "learning_rate": 2.36e-05,
      "loss": 0.8211,
      "step": 1056
    },
    {
      "epoch": 0.03235771035525176,
      "grad_norm": 0.41421613097190857,
      "learning_rate": 2.3575e-05,
      "loss": 0.8339,
      "step": 1057
    },
    {
      "epoch": 0.03238832313704482,
      "grad_norm": 0.1953829526901245,
      "learning_rate": 2.355e-05,
      "loss": 0.696,
      "step": 1058
    },
    {
      "epoch": 0.03241893591883786,
      "grad_norm": 0.6547843813896179,
      "learning_rate": 2.3525e-05,
      "loss": 0.8655,
      "step": 1059
    },
    {
      "epoch": 0.03244954870063091,
      "grad_norm": 0.21455705165863037,
      "learning_rate": 2.35e-05,
      "loss": 0.8717,
      "step": 1060
    },
    {
      "epoch": 0.032480161482423955,
      "grad_norm": 0.7060670256614685,
      "learning_rate": 2.3475e-05,
      "loss": 0.7204,
      "step": 1061
    },
    {
      "epoch": 0.03251077426421701,
      "grad_norm": 0.2592270076274872,
      "learning_rate": 2.345e-05,
      "loss": 0.7645,
      "step": 1062
    },
    {
      "epoch": 0.032541387046010055,
      "grad_norm": 0.24023239314556122,
      "learning_rate": 2.3425000000000004e-05,
      "loss": 0.8289,
      "step": 1063
    },
    {
      "epoch": 0.0325719998278031,
      "grad_norm": 0.18808050453662872,
      "learning_rate": 2.3400000000000003e-05,
      "loss": 0.6561,
      "step": 1064
    },
    {
      "epoch": 0.03260261260959615,
      "grad_norm": 0.24245800077915192,
      "learning_rate": 2.3375000000000002e-05,
      "loss": 0.7326,
      "step": 1065
    },
    {
      "epoch": 0.0326332253913892,
      "grad_norm": 0.43684300780296326,
      "learning_rate": 2.3350000000000002e-05,
      "loss": 0.7375,
      "step": 1066
    },
    {
      "epoch": 0.03266383817318225,
      "grad_norm": 0.17757698893547058,
      "learning_rate": 2.3325e-05,
      "loss": 0.7503,
      "step": 1067
    },
    {
      "epoch": 0.03269445095497529,
      "grad_norm": 0.21472454071044922,
      "learning_rate": 2.3300000000000004e-05,
      "loss": 0.7114,
      "step": 1068
    },
    {
      "epoch": 0.03272506373676834,
      "grad_norm": 0.24107953906059265,
      "learning_rate": 2.3275000000000003e-05,
      "loss": 0.6535,
      "step": 1069
    },
    {
      "epoch": 0.03275567651856139,
      "grad_norm": 0.9684463739395142,
      "learning_rate": 2.3250000000000003e-05,
      "loss": 0.7434,
      "step": 1070
    },
    {
      "epoch": 0.03278628930035444,
      "grad_norm": 0.17890724539756775,
      "learning_rate": 2.3225000000000002e-05,
      "loss": 0.8035,
      "step": 1071
    },
    {
      "epoch": 0.032816902082147485,
      "grad_norm": 0.2446495145559311,
      "learning_rate": 2.32e-05,
      "loss": 0.8181,
      "step": 1072
    },
    {
      "epoch": 0.03284751486394053,
      "grad_norm": 0.22007758915424347,
      "learning_rate": 2.3175e-05,
      "loss": 0.8838,
      "step": 1073
    },
    {
      "epoch": 0.032878127645733585,
      "grad_norm": 0.17445851862430573,
      "learning_rate": 2.3150000000000004e-05,
      "loss": 0.8169,
      "step": 1074
    },
    {
      "epoch": 0.03290874042752663,
      "grad_norm": 0.3616030216217041,
      "learning_rate": 2.3125000000000003e-05,
      "loss": 0.7559,
      "step": 1075
    },
    {
      "epoch": 0.03293935320931968,
      "grad_norm": 0.16611318290233612,
      "learning_rate": 2.3100000000000002e-05,
      "loss": 0.6994,
      "step": 1076
    },
    {
      "epoch": 0.032969965991112724,
      "grad_norm": 0.3122531771659851,
      "learning_rate": 2.3075000000000002e-05,
      "loss": 0.7743,
      "step": 1077
    },
    {
      "epoch": 0.03300057877290578,
      "grad_norm": 0.18453514575958252,
      "learning_rate": 2.305e-05,
      "loss": 0.6955,
      "step": 1078
    },
    {
      "epoch": 0.03303119155469882,
      "grad_norm": 0.24205918610095978,
      "learning_rate": 2.3025e-05,
      "loss": 0.8074,
      "step": 1079
    },
    {
      "epoch": 0.03306180433649187,
      "grad_norm": 0.16894246637821198,
      "learning_rate": 2.3000000000000003e-05,
      "loss": 0.7782,
      "step": 1080
    },
    {
      "epoch": 0.033092417118284916,
      "grad_norm": 0.21639180183410645,
      "learning_rate": 2.2975000000000003e-05,
      "loss": 0.8027,
      "step": 1081
    },
    {
      "epoch": 0.03312302990007797,
      "grad_norm": 0.2838424742221832,
      "learning_rate": 2.2950000000000002e-05,
      "loss": 0.9947,
      "step": 1082
    },
    {
      "epoch": 0.033153642681871015,
      "grad_norm": 0.3288102447986603,
      "learning_rate": 2.2925e-05,
      "loss": 0.8138,
      "step": 1083
    },
    {
      "epoch": 0.03318425546366406,
      "grad_norm": 0.22976329922676086,
      "learning_rate": 2.29e-05,
      "loss": 0.8113,
      "step": 1084
    },
    {
      "epoch": 0.03321486824545711,
      "grad_norm": 0.28852540254592896,
      "learning_rate": 2.2875e-05,
      "loss": 0.8381,
      "step": 1085
    },
    {
      "epoch": 0.03324548102725016,
      "grad_norm": 0.4625357687473297,
      "learning_rate": 2.2850000000000003e-05,
      "loss": 0.6988,
      "step": 1086
    },
    {
      "epoch": 0.03327609380904321,
      "grad_norm": 0.17708879709243774,
      "learning_rate": 2.2825000000000003e-05,
      "loss": 0.7215,
      "step": 1087
    },
    {
      "epoch": 0.03330670659083625,
      "grad_norm": 0.1813117414712906,
      "learning_rate": 2.2800000000000002e-05,
      "loss": 0.8303,
      "step": 1088
    },
    {
      "epoch": 0.03333731937262931,
      "grad_norm": 0.21459999680519104,
      "learning_rate": 2.2775e-05,
      "loss": 0.7302,
      "step": 1089
    },
    {
      "epoch": 0.03336793215442235,
      "grad_norm": 0.2915843725204468,
      "learning_rate": 2.275e-05,
      "loss": 0.7303,
      "step": 1090
    },
    {
      "epoch": 0.0333985449362154,
      "grad_norm": 0.30159488320350647,
      "learning_rate": 2.2725000000000003e-05,
      "loss": 0.7838,
      "step": 1091
    },
    {
      "epoch": 0.033429157718008445,
      "grad_norm": 0.21692398190498352,
      "learning_rate": 2.2700000000000003e-05,
      "loss": 0.7038,
      "step": 1092
    },
    {
      "epoch": 0.0334597704998015,
      "grad_norm": 0.24687717854976654,
      "learning_rate": 2.2675000000000002e-05,
      "loss": 0.6412,
      "step": 1093
    },
    {
      "epoch": 0.033490383281594545,
      "grad_norm": 0.22566983103752136,
      "learning_rate": 2.265e-05,
      "loss": 0.808,
      "step": 1094
    },
    {
      "epoch": 0.03352099606338759,
      "grad_norm": 0.24026106297969818,
      "learning_rate": 2.2625e-05,
      "loss": 0.738,
      "step": 1095
    },
    {
      "epoch": 0.03355160884518064,
      "grad_norm": 0.3850497007369995,
      "learning_rate": 2.26e-05,
      "loss": 0.6194,
      "step": 1096
    },
    {
      "epoch": 0.03358222162697369,
      "grad_norm": 0.19806824624538422,
      "learning_rate": 2.2575000000000003e-05,
      "loss": 0.7714,
      "step": 1097
    },
    {
      "epoch": 0.03361283440876674,
      "grad_norm": 0.22818557918071747,
      "learning_rate": 2.2550000000000003e-05,
      "loss": 0.6665,
      "step": 1098
    },
    {
      "epoch": 0.03364344719055978,
      "grad_norm": 0.18318063020706177,
      "learning_rate": 2.2525000000000002e-05,
      "loss": 0.7591,
      "step": 1099
    },
    {
      "epoch": 0.03367405997235283,
      "grad_norm": 0.8390282392501831,
      "learning_rate": 2.25e-05,
      "loss": 0.7297,
      "step": 1100
    },
    {
      "epoch": 0.03370467275414588,
      "grad_norm": 0.28999003767967224,
      "learning_rate": 2.2475e-05,
      "loss": 0.6721,
      "step": 1101
    },
    {
      "epoch": 0.03373528553593893,
      "grad_norm": 0.1815023273229599,
      "learning_rate": 2.245e-05,
      "loss": 0.6363,
      "step": 1102
    },
    {
      "epoch": 0.033765898317731975,
      "grad_norm": 0.2573976218700409,
      "learning_rate": 2.2425000000000003e-05,
      "loss": 0.7597,
      "step": 1103
    },
    {
      "epoch": 0.03379651109952502,
      "grad_norm": 0.2713649272918701,
      "learning_rate": 2.2400000000000002e-05,
      "loss": 0.7055,
      "step": 1104
    },
    {
      "epoch": 0.033827123881318075,
      "grad_norm": 1.1128437519073486,
      "learning_rate": 2.2375000000000002e-05,
      "loss": 0.8261,
      "step": 1105
    },
    {
      "epoch": 0.03385773666311112,
      "grad_norm": 0.29000529646873474,
      "learning_rate": 2.235e-05,
      "loss": 0.8487,
      "step": 1106
    },
    {
      "epoch": 0.03388834944490417,
      "grad_norm": 0.44423502683639526,
      "learning_rate": 2.2325e-05,
      "loss": 0.8626,
      "step": 1107
    },
    {
      "epoch": 0.033918962226697214,
      "grad_norm": 0.16694581508636475,
      "learning_rate": 2.23e-05,
      "loss": 0.8419,
      "step": 1108
    },
    {
      "epoch": 0.03394957500849027,
      "grad_norm": 0.14866119623184204,
      "learning_rate": 2.2275000000000003e-05,
      "loss": 0.6843,
      "step": 1109
    },
    {
      "epoch": 0.03398018779028331,
      "grad_norm": 0.25484177470207214,
      "learning_rate": 2.2250000000000002e-05,
      "loss": 0.7502,
      "step": 1110
    },
    {
      "epoch": 0.03401080057207636,
      "grad_norm": 0.22000445425510406,
      "learning_rate": 2.2225e-05,
      "loss": 0.6413,
      "step": 1111
    },
    {
      "epoch": 0.034041413353869406,
      "grad_norm": 0.21870647370815277,
      "learning_rate": 2.22e-05,
      "loss": 0.8555,
      "step": 1112
    },
    {
      "epoch": 0.03407202613566246,
      "grad_norm": 0.20906712114810944,
      "learning_rate": 2.2175e-05,
      "loss": 0.7018,
      "step": 1113
    },
    {
      "epoch": 0.034102638917455505,
      "grad_norm": 0.26261433959007263,
      "learning_rate": 2.215e-05,
      "loss": 0.7892,
      "step": 1114
    },
    {
      "epoch": 0.03413325169924855,
      "grad_norm": 0.23263175785541534,
      "learning_rate": 2.2125000000000002e-05,
      "loss": 0.8401,
      "step": 1115
    },
    {
      "epoch": 0.0341638644810416,
      "grad_norm": 0.20982681214809418,
      "learning_rate": 2.2100000000000002e-05,
      "loss": 0.7179,
      "step": 1116
    },
    {
      "epoch": 0.03419447726283465,
      "grad_norm": 0.2989276051521301,
      "learning_rate": 2.2075e-05,
      "loss": 0.8369,
      "step": 1117
    },
    {
      "epoch": 0.0342250900446277,
      "grad_norm": 0.18896664679050446,
      "learning_rate": 2.205e-05,
      "loss": 0.6666,
      "step": 1118
    },
    {
      "epoch": 0.03425570282642074,
      "grad_norm": 0.5758681297302246,
      "learning_rate": 2.2025e-05,
      "loss": 0.7982,
      "step": 1119
    },
    {
      "epoch": 0.03428631560821379,
      "grad_norm": 0.1584305763244629,
      "learning_rate": 2.2000000000000003e-05,
      "loss": 0.7726,
      "step": 1120
    },
    {
      "epoch": 0.03431692839000684,
      "grad_norm": 0.2989090085029602,
      "learning_rate": 2.1975000000000002e-05,
      "loss": 0.6603,
      "step": 1121
    },
    {
      "epoch": 0.03434754117179989,
      "grad_norm": 0.24664802849292755,
      "learning_rate": 2.195e-05,
      "loss": 0.6185,
      "step": 1122
    },
    {
      "epoch": 0.034378153953592935,
      "grad_norm": 0.23095259070396423,
      "learning_rate": 2.1925e-05,
      "loss": 0.7402,
      "step": 1123
    },
    {
      "epoch": 0.03440876673538598,
      "grad_norm": 0.34628477692604065,
      "learning_rate": 2.19e-05,
      "loss": 0.7365,
      "step": 1124
    },
    {
      "epoch": 0.034439379517179035,
      "grad_norm": 0.33656370639801025,
      "learning_rate": 2.1875e-05,
      "loss": 0.6603,
      "step": 1125
    },
    {
      "epoch": 0.03446999229897208,
      "grad_norm": 0.4978480935096741,
      "learning_rate": 2.1850000000000003e-05,
      "loss": 0.6149,
      "step": 1126
    },
    {
      "epoch": 0.03450060508076513,
      "grad_norm": 0.20183926820755005,
      "learning_rate": 2.1825000000000002e-05,
      "loss": 0.8074,
      "step": 1127
    },
    {
      "epoch": 0.034531217862558174,
      "grad_norm": 0.22236734628677368,
      "learning_rate": 2.18e-05,
      "loss": 0.6891,
      "step": 1128
    },
    {
      "epoch": 0.03456183064435123,
      "grad_norm": 0.288824200630188,
      "learning_rate": 2.1775e-05,
      "loss": 0.8746,
      "step": 1129
    },
    {
      "epoch": 0.03459244342614427,
      "grad_norm": 0.15575969219207764,
      "learning_rate": 2.175e-05,
      "loss": 0.7581,
      "step": 1130
    },
    {
      "epoch": 0.03462305620793732,
      "grad_norm": 0.3309248089790344,
      "learning_rate": 2.1725e-05,
      "loss": 0.7262,
      "step": 1131
    },
    {
      "epoch": 0.034653668989730366,
      "grad_norm": 0.20333746075630188,
      "learning_rate": 2.1700000000000002e-05,
      "loss": 0.8743,
      "step": 1132
    },
    {
      "epoch": 0.03468428177152342,
      "grad_norm": 0.1957504153251648,
      "learning_rate": 2.1675e-05,
      "loss": 0.7458,
      "step": 1133
    },
    {
      "epoch": 0.034714894553316465,
      "grad_norm": 0.15753571689128876,
      "learning_rate": 2.165e-05,
      "loss": 0.6516,
      "step": 1134
    },
    {
      "epoch": 0.03474550733510951,
      "grad_norm": 0.18009740114212036,
      "learning_rate": 2.1625e-05,
      "loss": 0.7944,
      "step": 1135
    },
    {
      "epoch": 0.03477612011690256,
      "grad_norm": 0.3112514913082123,
      "learning_rate": 2.16e-05,
      "loss": 0.7174,
      "step": 1136
    },
    {
      "epoch": 0.03480673289869561,
      "grad_norm": 0.1874092072248459,
      "learning_rate": 2.1575e-05,
      "loss": 0.7603,
      "step": 1137
    },
    {
      "epoch": 0.03483734568048866,
      "grad_norm": 0.1669309288263321,
      "learning_rate": 2.1550000000000002e-05,
      "loss": 0.8291,
      "step": 1138
    },
    {
      "epoch": 0.034867958462281703,
      "grad_norm": 0.2280818074941635,
      "learning_rate": 2.1525e-05,
      "loss": 0.7697,
      "step": 1139
    },
    {
      "epoch": 0.03489857124407475,
      "grad_norm": 0.19658879935741425,
      "learning_rate": 2.15e-05,
      "loss": 0.8109,
      "step": 1140
    },
    {
      "epoch": 0.0349291840258678,
      "grad_norm": 0.41541388630867004,
      "learning_rate": 2.1475e-05,
      "loss": 0.7146,
      "step": 1141
    },
    {
      "epoch": 0.03495979680766085,
      "grad_norm": 0.40899837017059326,
      "learning_rate": 2.145e-05,
      "loss": 0.694,
      "step": 1142
    },
    {
      "epoch": 0.034990409589453896,
      "grad_norm": 0.26091310381889343,
      "learning_rate": 2.1425e-05,
      "loss": 0.7817,
      "step": 1143
    },
    {
      "epoch": 0.03502102237124694,
      "grad_norm": 0.16419334709644318,
      "learning_rate": 2.1400000000000002e-05,
      "loss": 0.8975,
      "step": 1144
    },
    {
      "epoch": 0.035051635153039995,
      "grad_norm": 0.29402869939804077,
      "learning_rate": 2.1375e-05,
      "loss": 0.7862,
      "step": 1145
    },
    {
      "epoch": 0.03508224793483304,
      "grad_norm": 0.15367941558361053,
      "learning_rate": 2.135e-05,
      "loss": 0.7184,
      "step": 1146
    },
    {
      "epoch": 0.03511286071662609,
      "grad_norm": 0.3092745244503021,
      "learning_rate": 2.1325e-05,
      "loss": 0.7517,
      "step": 1147
    },
    {
      "epoch": 0.035143473498419134,
      "grad_norm": 0.26073184609413147,
      "learning_rate": 2.13e-05,
      "loss": 0.82,
      "step": 1148
    },
    {
      "epoch": 0.03517408628021219,
      "grad_norm": 0.4355182647705078,
      "learning_rate": 2.1275000000000002e-05,
      "loss": 0.7374,
      "step": 1149
    },
    {
      "epoch": 0.03520469906200523,
      "grad_norm": 0.2065318375825882,
      "learning_rate": 2.125e-05,
      "loss": 0.8057,
      "step": 1150
    },
    {
      "epoch": 0.03523531184379828,
      "grad_norm": 0.559861958026886,
      "learning_rate": 2.1225e-05,
      "loss": 0.6041,
      "step": 1151
    },
    {
      "epoch": 0.035265924625591326,
      "grad_norm": 0.9366693496704102,
      "learning_rate": 2.12e-05,
      "loss": 0.8629,
      "step": 1152
    },
    {
      "epoch": 0.03529653740738438,
      "grad_norm": 0.18763041496276855,
      "learning_rate": 2.1175e-05,
      "loss": 0.8517,
      "step": 1153
    },
    {
      "epoch": 0.035327150189177425,
      "grad_norm": 0.21863533556461334,
      "learning_rate": 2.115e-05,
      "loss": 0.7602,
      "step": 1154
    },
    {
      "epoch": 0.03535776297097047,
      "grad_norm": 0.21045511960983276,
      "learning_rate": 2.1125000000000002e-05,
      "loss": 0.8532,
      "step": 1155
    },
    {
      "epoch": 0.03538837575276352,
      "grad_norm": 0.20625713467597961,
      "learning_rate": 2.11e-05,
      "loss": 0.8244,
      "step": 1156
    },
    {
      "epoch": 0.03541898853455657,
      "grad_norm": 0.1973738670349121,
      "learning_rate": 2.1075e-05,
      "loss": 0.6731,
      "step": 1157
    },
    {
      "epoch": 0.03544960131634962,
      "grad_norm": 0.20676802098751068,
      "learning_rate": 2.105e-05,
      "loss": 0.7205,
      "step": 1158
    },
    {
      "epoch": 0.035480214098142664,
      "grad_norm": 0.3089343011379242,
      "learning_rate": 2.1025e-05,
      "loss": 0.8358,
      "step": 1159
    },
    {
      "epoch": 0.03551082687993571,
      "grad_norm": 0.20796692371368408,
      "learning_rate": 2.1e-05,
      "loss": 0.6736,
      "step": 1160
    },
    {
      "epoch": 0.03554143966172876,
      "grad_norm": 0.2701050341129303,
      "learning_rate": 2.0975e-05,
      "loss": 0.8352,
      "step": 1161
    },
    {
      "epoch": 0.03557205244352181,
      "grad_norm": 0.2579197883605957,
      "learning_rate": 2.095e-05,
      "loss": 0.8468,
      "step": 1162
    },
    {
      "epoch": 0.035602665225314856,
      "grad_norm": 0.19530996680259705,
      "learning_rate": 2.0925e-05,
      "loss": 0.824,
      "step": 1163
    },
    {
      "epoch": 0.0356332780071079,
      "grad_norm": 0.27984434366226196,
      "learning_rate": 2.09e-05,
      "loss": 0.684,
      "step": 1164
    },
    {
      "epoch": 0.035663890788900955,
      "grad_norm": 0.23639734089374542,
      "learning_rate": 2.0875e-05,
      "loss": 0.842,
      "step": 1165
    },
    {
      "epoch": 0.035694503570694,
      "grad_norm": 0.2505331337451935,
      "learning_rate": 2.085e-05,
      "loss": 0.8035,
      "step": 1166
    },
    {
      "epoch": 0.03572511635248705,
      "grad_norm": 0.3943125307559967,
      "learning_rate": 2.0825e-05,
      "loss": 0.8661,
      "step": 1167
    },
    {
      "epoch": 0.035755729134280094,
      "grad_norm": 0.1803193837404251,
      "learning_rate": 2.08e-05,
      "loss": 0.7519,
      "step": 1168
    },
    {
      "epoch": 0.03578634191607315,
      "grad_norm": 0.23200470209121704,
      "learning_rate": 2.0775e-05,
      "loss": 0.7943,
      "step": 1169
    },
    {
      "epoch": 0.03581695469786619,
      "grad_norm": 0.3062182664871216,
      "learning_rate": 2.075e-05,
      "loss": 0.7633,
      "step": 1170
    },
    {
      "epoch": 0.03584756747965924,
      "grad_norm": 0.24521395564079285,
      "learning_rate": 2.0725e-05,
      "loss": 0.7834,
      "step": 1171
    },
    {
      "epoch": 0.03587818026145229,
      "grad_norm": 0.38243353366851807,
      "learning_rate": 2.07e-05,
      "loss": 0.7468,
      "step": 1172
    },
    {
      "epoch": 0.03590879304324534,
      "grad_norm": 0.2493719905614853,
      "learning_rate": 2.0675e-05,
      "loss": 0.9047,
      "step": 1173
    },
    {
      "epoch": 0.035939405825038385,
      "grad_norm": 0.2561963200569153,
      "learning_rate": 2.065e-05,
      "loss": 0.7653,
      "step": 1174
    },
    {
      "epoch": 0.03597001860683143,
      "grad_norm": 0.18940269947052002,
      "learning_rate": 2.0625e-05,
      "loss": 0.7652,
      "step": 1175
    },
    {
      "epoch": 0.036000631388624485,
      "grad_norm": 0.2041228860616684,
      "learning_rate": 2.06e-05,
      "loss": 0.7069,
      "step": 1176
    },
    {
      "epoch": 0.03603124417041753,
      "grad_norm": 0.20829564332962036,
      "learning_rate": 2.0575e-05,
      "loss": 0.7293,
      "step": 1177
    },
    {
      "epoch": 0.03606185695221058,
      "grad_norm": 0.24220138788223267,
      "learning_rate": 2.055e-05,
      "loss": 0.9095,
      "step": 1178
    },
    {
      "epoch": 0.036092469734003624,
      "grad_norm": 0.15308748185634613,
      "learning_rate": 2.0525e-05,
      "loss": 0.8193,
      "step": 1179
    },
    {
      "epoch": 0.03612308251579668,
      "grad_norm": 0.1888790875673294,
      "learning_rate": 2.05e-05,
      "loss": 0.7671,
      "step": 1180
    },
    {
      "epoch": 0.03615369529758972,
      "grad_norm": 0.21594621241092682,
      "learning_rate": 2.0475e-05,
      "loss": 0.7601,
      "step": 1181
    },
    {
      "epoch": 0.03618430807938277,
      "grad_norm": 0.15653324127197266,
      "learning_rate": 2.045e-05,
      "loss": 0.6952,
      "step": 1182
    },
    {
      "epoch": 0.036214920861175816,
      "grad_norm": 0.35886916518211365,
      "learning_rate": 2.0425e-05,
      "loss": 0.7541,
      "step": 1183
    },
    {
      "epoch": 0.03624553364296887,
      "grad_norm": 0.17434954643249512,
      "learning_rate": 2.04e-05,
      "loss": 0.8377,
      "step": 1184
    },
    {
      "epoch": 0.036276146424761915,
      "grad_norm": 0.1707240492105484,
      "learning_rate": 2.0375e-05,
      "loss": 0.8552,
      "step": 1185
    },
    {
      "epoch": 0.03630675920655496,
      "grad_norm": 0.19369496405124664,
      "learning_rate": 2.035e-05,
      "loss": 0.7394,
      "step": 1186
    },
    {
      "epoch": 0.03633737198834801,
      "grad_norm": 0.31709030270576477,
      "learning_rate": 2.0325e-05,
      "loss": 0.6634,
      "step": 1187
    },
    {
      "epoch": 0.03636798477014106,
      "grad_norm": 0.27356967329978943,
      "learning_rate": 2.0300000000000002e-05,
      "loss": 0.8346,
      "step": 1188
    },
    {
      "epoch": 0.03639859755193411,
      "grad_norm": 0.32094335556030273,
      "learning_rate": 2.0275e-05,
      "loss": 0.7003,
      "step": 1189
    },
    {
      "epoch": 0.036429210333727154,
      "grad_norm": 0.2092023342847824,
      "learning_rate": 2.025e-05,
      "loss": 0.6149,
      "step": 1190
    },
    {
      "epoch": 0.0364598231155202,
      "grad_norm": 0.18824338912963867,
      "learning_rate": 2.0225000000000004e-05,
      "loss": 0.8517,
      "step": 1191
    },
    {
      "epoch": 0.03649043589731325,
      "grad_norm": 0.16726665198802948,
      "learning_rate": 2.0200000000000003e-05,
      "loss": 0.8007,
      "step": 1192
    },
    {
      "epoch": 0.0365210486791063,
      "grad_norm": 0.19236837327480316,
      "learning_rate": 2.0175000000000003e-05,
      "loss": 0.8085,
      "step": 1193
    },
    {
      "epoch": 0.036551661460899346,
      "grad_norm": 0.4444602429866791,
      "learning_rate": 2.0150000000000002e-05,
      "loss": 0.7538,
      "step": 1194
    },
    {
      "epoch": 0.03658227424269239,
      "grad_norm": 0.20198461413383484,
      "learning_rate": 2.0125e-05,
      "loss": 0.7751,
      "step": 1195
    },
    {
      "epoch": 0.036612887024485445,
      "grad_norm": 0.2651364207267761,
      "learning_rate": 2.01e-05,
      "loss": 0.8124,
      "step": 1196
    },
    {
      "epoch": 0.03664349980627849,
      "grad_norm": 0.24603505432605743,
      "learning_rate": 2.0075000000000003e-05,
      "loss": 0.7861,
      "step": 1197
    },
    {
      "epoch": 0.03667411258807154,
      "grad_norm": 0.21193943917751312,
      "learning_rate": 2.0050000000000003e-05,
      "loss": 0.6692,
      "step": 1198
    },
    {
      "epoch": 0.036704725369864584,
      "grad_norm": 0.2244226038455963,
      "learning_rate": 2.0025000000000002e-05,
      "loss": 0.7754,
      "step": 1199
    },
    {
      "epoch": 0.03673533815165764,
      "grad_norm": 0.26447299122810364,
      "learning_rate": 2e-05,
      "loss": 0.5905,
      "step": 1200
    },
    {
      "epoch": 0.03676595093345068,
      "grad_norm": 0.19041816890239716,
      "learning_rate": 1.9975e-05,
      "loss": 0.8095,
      "step": 1201
    },
    {
      "epoch": 0.03679656371524373,
      "grad_norm": 0.3582216203212738,
      "learning_rate": 1.995e-05,
      "loss": 0.7051,
      "step": 1202
    },
    {
      "epoch": 0.036827176497036776,
      "grad_norm": 0.23367851972579956,
      "learning_rate": 1.9925000000000003e-05,
      "loss": 0.7629,
      "step": 1203
    },
    {
      "epoch": 0.03685778927882983,
      "grad_norm": 0.1773194819688797,
      "learning_rate": 1.9900000000000003e-05,
      "loss": 0.8405,
      "step": 1204
    },
    {
      "epoch": 0.036888402060622875,
      "grad_norm": 0.1546410471200943,
      "learning_rate": 1.9875000000000002e-05,
      "loss": 0.7142,
      "step": 1205
    },
    {
      "epoch": 0.03691901484241592,
      "grad_norm": 0.29562875628471375,
      "learning_rate": 1.985e-05,
      "loss": 0.8108,
      "step": 1206
    },
    {
      "epoch": 0.03694962762420897,
      "grad_norm": 0.2659907042980194,
      "learning_rate": 1.9825e-05,
      "loss": 0.8485,
      "step": 1207
    },
    {
      "epoch": 0.03698024040600202,
      "grad_norm": 0.5846388339996338,
      "learning_rate": 1.9800000000000004e-05,
      "loss": 0.6712,
      "step": 1208
    },
    {
      "epoch": 0.03701085318779507,
      "grad_norm": 0.21705412864685059,
      "learning_rate": 1.9775000000000003e-05,
      "loss": 0.7667,
      "step": 1209
    },
    {
      "epoch": 0.037041465969588114,
      "grad_norm": 0.2733075022697449,
      "learning_rate": 1.9750000000000002e-05,
      "loss": 0.7589,
      "step": 1210
    },
    {
      "epoch": 0.03707207875138116,
      "grad_norm": 0.21070265769958496,
      "learning_rate": 1.9725000000000002e-05,
      "loss": 0.7922,
      "step": 1211
    },
    {
      "epoch": 0.03710269153317421,
      "grad_norm": 0.23948334157466888,
      "learning_rate": 1.97e-05,
      "loss": 0.8203,
      "step": 1212
    },
    {
      "epoch": 0.03713330431496726,
      "grad_norm": 0.45053327083587646,
      "learning_rate": 1.9675e-05,
      "loss": 0.695,
      "step": 1213
    },
    {
      "epoch": 0.037163917096760306,
      "grad_norm": 0.20710590481758118,
      "learning_rate": 1.9650000000000003e-05,
      "loss": 0.8048,
      "step": 1214
    },
    {
      "epoch": 0.03719452987855335,
      "grad_norm": 0.24359489977359772,
      "learning_rate": 1.9625000000000003e-05,
      "loss": 0.7106,
      "step": 1215
    },
    {
      "epoch": 0.037225142660346405,
      "grad_norm": 0.5188857913017273,
      "learning_rate": 1.9600000000000002e-05,
      "loss": 0.7392,
      "step": 1216
    },
    {
      "epoch": 0.03725575544213945,
      "grad_norm": 0.14952421188354492,
      "learning_rate": 1.9575e-05,
      "loss": 0.6618,
      "step": 1217
    },
    {
      "epoch": 0.0372863682239325,
      "grad_norm": 0.17325368523597717,
      "learning_rate": 1.955e-05,
      "loss": 0.9395,
      "step": 1218
    },
    {
      "epoch": 0.037316981005725544,
      "grad_norm": 0.16947080194950104,
      "learning_rate": 1.9525e-05,
      "loss": 0.5792,
      "step": 1219
    },
    {
      "epoch": 0.0373475937875186,
      "grad_norm": 0.15876354277133942,
      "learning_rate": 1.9500000000000003e-05,
      "loss": 0.6842,
      "step": 1220
    },
    {
      "epoch": 0.037378206569311644,
      "grad_norm": 0.1491585373878479,
      "learning_rate": 1.9475000000000002e-05,
      "loss": 0.7157,
      "step": 1221
    },
    {
      "epoch": 0.03740881935110469,
      "grad_norm": 0.3761278986930847,
      "learning_rate": 1.9450000000000002e-05,
      "loss": 0.7599,
      "step": 1222
    },
    {
      "epoch": 0.037439432132897736,
      "grad_norm": 0.13297878205776215,
      "learning_rate": 1.9425e-05,
      "loss": 0.5957,
      "step": 1223
    },
    {
      "epoch": 0.03747004491469079,
      "grad_norm": 0.2369900643825531,
      "learning_rate": 1.94e-05,
      "loss": 0.704,
      "step": 1224
    },
    {
      "epoch": 0.037500657696483836,
      "grad_norm": 0.19370752573013306,
      "learning_rate": 1.9375e-05,
      "loss": 0.758,
      "step": 1225
    },
    {
      "epoch": 0.03753127047827688,
      "grad_norm": 0.23541848361492157,
      "learning_rate": 1.9350000000000003e-05,
      "loss": 0.6556,
      "step": 1226
    },
    {
      "epoch": 0.03756188326006993,
      "grad_norm": 0.2681790292263031,
      "learning_rate": 1.9325000000000002e-05,
      "loss": 0.7132,
      "step": 1227
    },
    {
      "epoch": 0.03759249604186298,
      "grad_norm": 0.3386518359184265,
      "learning_rate": 1.93e-05,
      "loss": 0.8615,
      "step": 1228
    },
    {
      "epoch": 0.03762310882365603,
      "grad_norm": 0.3045949935913086,
      "learning_rate": 1.9275e-05,
      "loss": 0.8324,
      "step": 1229
    },
    {
      "epoch": 0.037653721605449074,
      "grad_norm": 0.17442895472049713,
      "learning_rate": 1.925e-05,
      "loss": 0.6762,
      "step": 1230
    },
    {
      "epoch": 0.03768433438724212,
      "grad_norm": 0.15883266925811768,
      "learning_rate": 1.9225e-05,
      "loss": 0.751,
      "step": 1231
    },
    {
      "epoch": 0.03771494716903517,
      "grad_norm": 0.44343405961990356,
      "learning_rate": 1.9200000000000003e-05,
      "loss": 0.7411,
      "step": 1232
    },
    {
      "epoch": 0.03774555995082822,
      "grad_norm": 0.3465401232242584,
      "learning_rate": 1.9175000000000002e-05,
      "loss": 0.8715,
      "step": 1233
    },
    {
      "epoch": 0.037776172732621266,
      "grad_norm": 0.19120873510837555,
      "learning_rate": 1.915e-05,
      "loss": 0.7032,
      "step": 1234
    },
    {
      "epoch": 0.03780678551441431,
      "grad_norm": 0.27793917059898376,
      "learning_rate": 1.9125e-05,
      "loss": 0.6937,
      "step": 1235
    },
    {
      "epoch": 0.037837398296207365,
      "grad_norm": 0.18054048717021942,
      "learning_rate": 1.91e-05,
      "loss": 0.7772,
      "step": 1236
    },
    {
      "epoch": 0.03786801107800041,
      "grad_norm": 0.2504083812236786,
      "learning_rate": 1.9075000000000003e-05,
      "loss": 0.6783,
      "step": 1237
    },
    {
      "epoch": 0.03789862385979346,
      "grad_norm": 0.22988668084144592,
      "learning_rate": 1.9050000000000002e-05,
      "loss": 0.8623,
      "step": 1238
    },
    {
      "epoch": 0.037929236641586504,
      "grad_norm": 0.21425902843475342,
      "learning_rate": 1.9025e-05,
      "loss": 0.7867,
      "step": 1239
    },
    {
      "epoch": 0.03795984942337956,
      "grad_norm": 0.23794354498386383,
      "learning_rate": 1.9e-05,
      "loss": 0.7086,
      "step": 1240
    },
    {
      "epoch": 0.037990462205172604,
      "grad_norm": 0.16965839266777039,
      "learning_rate": 1.8975e-05,
      "loss": 0.6967,
      "step": 1241
    },
    {
      "epoch": 0.03802107498696565,
      "grad_norm": 0.22722351551055908,
      "learning_rate": 1.895e-05,
      "loss": 0.9438,
      "step": 1242
    },
    {
      "epoch": 0.038051687768758696,
      "grad_norm": 0.45486196875572205,
      "learning_rate": 1.8925000000000003e-05,
      "loss": 0.6971,
      "step": 1243
    },
    {
      "epoch": 0.03808230055055175,
      "grad_norm": 0.19238613545894623,
      "learning_rate": 1.8900000000000002e-05,
      "loss": 0.8207,
      "step": 1244
    },
    {
      "epoch": 0.038112913332344796,
      "grad_norm": 0.19000859558582306,
      "learning_rate": 1.8875e-05,
      "loss": 0.7583,
      "step": 1245
    },
    {
      "epoch": 0.03814352611413784,
      "grad_norm": 0.199451744556427,
      "learning_rate": 1.885e-05,
      "loss": 0.8768,
      "step": 1246
    },
    {
      "epoch": 0.03817413889593089,
      "grad_norm": 0.1883232444524765,
      "learning_rate": 1.8825e-05,
      "loss": 0.829,
      "step": 1247
    },
    {
      "epoch": 0.03820475167772394,
      "grad_norm": 0.20281562209129333,
      "learning_rate": 1.88e-05,
      "loss": 0.8333,
      "step": 1248
    },
    {
      "epoch": 0.03823536445951699,
      "grad_norm": 0.29612159729003906,
      "learning_rate": 1.8775000000000002e-05,
      "loss": 0.7419,
      "step": 1249
    },
    {
      "epoch": 0.038265977241310034,
      "grad_norm": 0.3115490972995758,
      "learning_rate": 1.8750000000000002e-05,
      "loss": 0.6706,
      "step": 1250
    },
    {
      "epoch": 0.03829659002310308,
      "grad_norm": 0.23806744813919067,
      "learning_rate": 1.8725e-05,
      "loss": 0.7748,
      "step": 1251
    },
    {
      "epoch": 0.038327202804896134,
      "grad_norm": 0.1569286286830902,
      "learning_rate": 1.87e-05,
      "loss": 0.7108,
      "step": 1252
    },
    {
      "epoch": 0.03835781558668918,
      "grad_norm": 0.2330920249223709,
      "learning_rate": 1.8675e-05,
      "loss": 0.7306,
      "step": 1253
    },
    {
      "epoch": 0.038388428368482226,
      "grad_norm": 0.20600777864456177,
      "learning_rate": 1.865e-05,
      "loss": 0.8066,
      "step": 1254
    },
    {
      "epoch": 0.03841904115027528,
      "grad_norm": 0.5910000205039978,
      "learning_rate": 1.8625000000000002e-05,
      "loss": 0.7625,
      "step": 1255
    },
    {
      "epoch": 0.038449653932068326,
      "grad_norm": 0.17363347113132477,
      "learning_rate": 1.86e-05,
      "loss": 0.7875,
      "step": 1256
    },
    {
      "epoch": 0.03848026671386137,
      "grad_norm": 0.23878833651542664,
      "learning_rate": 1.8575e-05,
      "loss": 0.7119,
      "step": 1257
    },
    {
      "epoch": 0.03851087949565442,
      "grad_norm": 0.2298930585384369,
      "learning_rate": 1.855e-05,
      "loss": 0.7326,
      "step": 1258
    },
    {
      "epoch": 0.03854149227744747,
      "grad_norm": 0.3178658187389374,
      "learning_rate": 1.8525e-05,
      "loss": 0.7169,
      "step": 1259
    },
    {
      "epoch": 0.03857210505924052,
      "grad_norm": 0.4151080250740051,
      "learning_rate": 1.85e-05,
      "loss": 0.7638,
      "step": 1260
    },
    {
      "epoch": 0.038602717841033564,
      "grad_norm": 0.24473996460437775,
      "learning_rate": 1.8475000000000002e-05,
      "loss": 0.603,
      "step": 1261
    },
    {
      "epoch": 0.03863333062282661,
      "grad_norm": 0.20702874660491943,
      "learning_rate": 1.845e-05,
      "loss": 0.7191,
      "step": 1262
    },
    {
      "epoch": 0.03866394340461966,
      "grad_norm": 0.24452932178974152,
      "learning_rate": 1.8425e-05,
      "loss": 0.645,
      "step": 1263
    },
    {
      "epoch": 0.03869455618641271,
      "grad_norm": 0.18146070837974548,
      "learning_rate": 1.84e-05,
      "loss": 0.6619,
      "step": 1264
    },
    {
      "epoch": 0.038725168968205756,
      "grad_norm": 0.2599219083786011,
      "learning_rate": 1.8375e-05,
      "loss": 0.6883,
      "step": 1265
    },
    {
      "epoch": 0.0387557817499988,
      "grad_norm": 0.19145676493644714,
      "learning_rate": 1.8350000000000002e-05,
      "loss": 0.7064,
      "step": 1266
    },
    {
      "epoch": 0.038786394531791855,
      "grad_norm": 0.37507086992263794,
      "learning_rate": 1.8325e-05,
      "loss": 0.6854,
      "step": 1267
    },
    {
      "epoch": 0.0388170073135849,
      "grad_norm": 0.22284623980522156,
      "learning_rate": 1.83e-05,
      "loss": 0.7736,
      "step": 1268
    },
    {
      "epoch": 0.03884762009537795,
      "grad_norm": 0.18542031943798065,
      "learning_rate": 1.8275e-05,
      "loss": 0.5577,
      "step": 1269
    },
    {
      "epoch": 0.038878232877170994,
      "grad_norm": 0.19886282086372375,
      "learning_rate": 1.825e-05,
      "loss": 0.7679,
      "step": 1270
    },
    {
      "epoch": 0.03890884565896405,
      "grad_norm": 0.2014557272195816,
      "learning_rate": 1.8225e-05,
      "loss": 0.7225,
      "step": 1271
    },
    {
      "epoch": 0.038939458440757094,
      "grad_norm": 0.21231096982955933,
      "learning_rate": 1.8200000000000002e-05,
      "loss": 0.7631,
      "step": 1272
    },
    {
      "epoch": 0.03897007122255014,
      "grad_norm": 0.19582505524158478,
      "learning_rate": 1.8175e-05,
      "loss": 0.7002,
      "step": 1273
    },
    {
      "epoch": 0.039000684004343186,
      "grad_norm": 0.20389242470264435,
      "learning_rate": 1.815e-05,
      "loss": 0.7941,
      "step": 1274
    },
    {
      "epoch": 0.03903129678613624,
      "grad_norm": 0.14913064241409302,
      "learning_rate": 1.8125e-05,
      "loss": 0.6473,
      "step": 1275
    },
    {
      "epoch": 0.039061909567929286,
      "grad_norm": 0.2143448293209076,
      "learning_rate": 1.81e-05,
      "loss": 0.6623,
      "step": 1276
    },
    {
      "epoch": 0.03909252234972233,
      "grad_norm": 0.2809733748435974,
      "learning_rate": 1.8075e-05,
      "loss": 0.8006,
      "step": 1277
    },
    {
      "epoch": 0.03912313513151538,
      "grad_norm": 0.7105140089988708,
      "learning_rate": 1.805e-05,
      "loss": 0.693,
      "step": 1278
    },
    {
      "epoch": 0.03915374791330843,
      "grad_norm": 0.1628800332546234,
      "learning_rate": 1.8025e-05,
      "loss": 0.6688,
      "step": 1279
    },
    {
      "epoch": 0.03918436069510148,
      "grad_norm": 0.255191445350647,
      "learning_rate": 1.8e-05,
      "loss": 0.8915,
      "step": 1280
    },
    {
      "epoch": 0.039214973476894524,
      "grad_norm": 0.20962204039096832,
      "learning_rate": 1.7975e-05,
      "loss": 0.5719,
      "step": 1281
    },
    {
      "epoch": 0.03924558625868757,
      "grad_norm": 0.2621522843837738,
      "learning_rate": 1.795e-05,
      "loss": 0.6665,
      "step": 1282
    },
    {
      "epoch": 0.039276199040480624,
      "grad_norm": 0.2403772473335266,
      "learning_rate": 1.7925e-05,
      "loss": 0.7741,
      "step": 1283
    },
    {
      "epoch": 0.03930681182227367,
      "grad_norm": 0.23659536242485046,
      "learning_rate": 1.79e-05,
      "loss": 0.7036,
      "step": 1284
    },
    {
      "epoch": 0.039337424604066716,
      "grad_norm": 0.14874260127544403,
      "learning_rate": 1.7875e-05,
      "loss": 0.8201,
      "step": 1285
    },
    {
      "epoch": 0.03936803738585976,
      "grad_norm": 0.3450917899608612,
      "learning_rate": 1.785e-05,
      "loss": 0.707,
      "step": 1286
    },
    {
      "epoch": 0.039398650167652816,
      "grad_norm": 0.20324021577835083,
      "learning_rate": 1.7825e-05,
      "loss": 0.8864,
      "step": 1287
    },
    {
      "epoch": 0.03942926294944586,
      "grad_norm": 0.23524203896522522,
      "learning_rate": 1.78e-05,
      "loss": 0.7856,
      "step": 1288
    },
    {
      "epoch": 0.03945987573123891,
      "grad_norm": 0.17431683838367462,
      "learning_rate": 1.7775e-05,
      "loss": 0.6173,
      "step": 1289
    },
    {
      "epoch": 0.039490488513031954,
      "grad_norm": 0.24204204976558685,
      "learning_rate": 1.775e-05,
      "loss": 0.7369,
      "step": 1290
    },
    {
      "epoch": 0.03952110129482501,
      "grad_norm": 0.1875373274087906,
      "learning_rate": 1.7725e-05,
      "loss": 0.6903,
      "step": 1291
    },
    {
      "epoch": 0.039551714076618054,
      "grad_norm": 0.22936350107192993,
      "learning_rate": 1.77e-05,
      "loss": 0.9273,
      "step": 1292
    },
    {
      "epoch": 0.0395823268584111,
      "grad_norm": 0.2194318026304245,
      "learning_rate": 1.7675e-05,
      "loss": 0.7569,
      "step": 1293
    },
    {
      "epoch": 0.039612939640204146,
      "grad_norm": 0.3097097873687744,
      "learning_rate": 1.765e-05,
      "loss": 0.8024,
      "step": 1294
    },
    {
      "epoch": 0.0396435524219972,
      "grad_norm": 0.34713852405548096,
      "learning_rate": 1.7625e-05,
      "loss": 0.7178,
      "step": 1295
    },
    {
      "epoch": 0.039674165203790246,
      "grad_norm": 0.2232169359922409,
      "learning_rate": 1.76e-05,
      "loss": 0.6138,
      "step": 1296
    },
    {
      "epoch": 0.03970477798558329,
      "grad_norm": 0.15773159265518188,
      "learning_rate": 1.7575e-05,
      "loss": 0.7368,
      "step": 1297
    },
    {
      "epoch": 0.03973539076737634,
      "grad_norm": 0.5441001057624817,
      "learning_rate": 1.755e-05,
      "loss": 0.7527,
      "step": 1298
    },
    {
      "epoch": 0.03976600354916939,
      "grad_norm": 0.17534387111663818,
      "learning_rate": 1.7525e-05,
      "loss": 0.7909,
      "step": 1299
    },
    {
      "epoch": 0.03979661633096244,
      "grad_norm": 0.22553709149360657,
      "learning_rate": 1.75e-05,
      "loss": 0.7706,
      "step": 1300
    },
    {
      "epoch": 0.039827229112755484,
      "grad_norm": 0.4952869713306427,
      "learning_rate": 1.7475e-05,
      "loss": 0.8143,
      "step": 1301
    },
    {
      "epoch": 0.03985784189454853,
      "grad_norm": 0.2220001220703125,
      "learning_rate": 1.745e-05,
      "loss": 0.8561,
      "step": 1302
    },
    {
      "epoch": 0.039888454676341584,
      "grad_norm": 0.21453911066055298,
      "learning_rate": 1.7425e-05,
      "loss": 0.7594,
      "step": 1303
    },
    {
      "epoch": 0.03991906745813463,
      "grad_norm": 0.1848205029964447,
      "learning_rate": 1.74e-05,
      "loss": 0.7402,
      "step": 1304
    },
    {
      "epoch": 0.039949680239927676,
      "grad_norm": 0.1650918573141098,
      "learning_rate": 1.7375e-05,
      "loss": 0.6896,
      "step": 1305
    },
    {
      "epoch": 0.03998029302172072,
      "grad_norm": 0.19316811859607697,
      "learning_rate": 1.7349999999999998e-05,
      "loss": 0.7183,
      "step": 1306
    },
    {
      "epoch": 0.040010905803513776,
      "grad_norm": 0.2072644829750061,
      "learning_rate": 1.7325e-05,
      "loss": 0.6699,
      "step": 1307
    },
    {
      "epoch": 0.04004151858530682,
      "grad_norm": 0.3763982355594635,
      "learning_rate": 1.73e-05,
      "loss": 0.7419,
      "step": 1308
    },
    {
      "epoch": 0.04007213136709987,
      "grad_norm": 3.337364435195923,
      "learning_rate": 1.7275e-05,
      "loss": 0.6479,
      "step": 1309
    },
    {
      "epoch": 0.040102744148892915,
      "grad_norm": 0.43610507249832153,
      "learning_rate": 1.725e-05,
      "loss": 0.6495,
      "step": 1310
    },
    {
      "epoch": 0.04013335693068597,
      "grad_norm": 0.15165871381759644,
      "learning_rate": 1.7225e-05,
      "loss": 0.7791,
      "step": 1311
    },
    {
      "epoch": 0.040163969712479014,
      "grad_norm": 0.3084500730037689,
      "learning_rate": 1.7199999999999998e-05,
      "loss": 0.8576,
      "step": 1312
    },
    {
      "epoch": 0.04019458249427206,
      "grad_norm": 0.28761258721351624,
      "learning_rate": 1.7175e-05,
      "loss": 0.801,
      "step": 1313
    },
    {
      "epoch": 0.04022519527606511,
      "grad_norm": 0.30889445543289185,
      "learning_rate": 1.7150000000000004e-05,
      "loss": 0.7743,
      "step": 1314
    },
    {
      "epoch": 0.04025580805785816,
      "grad_norm": 0.23333163559436798,
      "learning_rate": 1.7125000000000003e-05,
      "loss": 0.6778,
      "step": 1315
    },
    {
      "epoch": 0.040286420839651206,
      "grad_norm": 0.15331430733203888,
      "learning_rate": 1.7100000000000002e-05,
      "loss": 0.732,
      "step": 1316
    },
    {
      "epoch": 0.04031703362144425,
      "grad_norm": 0.17977628111839294,
      "learning_rate": 1.7075e-05,
      "loss": 0.6152,
      "step": 1317
    },
    {
      "epoch": 0.0403476464032373,
      "grad_norm": 0.22740229964256287,
      "learning_rate": 1.705e-05,
      "loss": 0.7236,
      "step": 1318
    },
    {
      "epoch": 0.04037825918503035,
      "grad_norm": 0.22869469225406647,
      "learning_rate": 1.7025e-05,
      "loss": 0.7997,
      "step": 1319
    },
    {
      "epoch": 0.0404088719668234,
      "grad_norm": 0.18710190057754517,
      "learning_rate": 1.7000000000000003e-05,
      "loss": 0.659,
      "step": 1320
    },
    {
      "epoch": 0.040439484748616444,
      "grad_norm": 0.19838039577007294,
      "learning_rate": 1.6975000000000003e-05,
      "loss": 0.7447,
      "step": 1321
    },
    {
      "epoch": 0.04047009753040949,
      "grad_norm": 0.1978246420621872,
      "learning_rate": 1.6950000000000002e-05,
      "loss": 0.7901,
      "step": 1322
    },
    {
      "epoch": 0.040500710312202544,
      "grad_norm": 0.20875835418701172,
      "learning_rate": 1.6925e-05,
      "loss": 0.7618,
      "step": 1323
    },
    {
      "epoch": 0.04053132309399559,
      "grad_norm": 0.25075897574424744,
      "learning_rate": 1.69e-05,
      "loss": 0.8021,
      "step": 1324
    },
    {
      "epoch": 0.040561935875788636,
      "grad_norm": 0.16941377520561218,
      "learning_rate": 1.6875000000000004e-05,
      "loss": 0.8115,
      "step": 1325
    },
    {
      "epoch": 0.04059254865758168,
      "grad_norm": 0.1568528115749359,
      "learning_rate": 1.6850000000000003e-05,
      "loss": 0.758,
      "step": 1326
    },
    {
      "epoch": 0.040623161439374736,
      "grad_norm": 0.27601784467697144,
      "learning_rate": 1.6825000000000002e-05,
      "loss": 0.7514,
      "step": 1327
    },
    {
      "epoch": 0.04065377422116778,
      "grad_norm": 0.184527188539505,
      "learning_rate": 1.6800000000000002e-05,
      "loss": 0.7108,
      "step": 1328
    },
    {
      "epoch": 0.04068438700296083,
      "grad_norm": 0.2047712802886963,
      "learning_rate": 1.6775e-05,
      "loss": 0.8038,
      "step": 1329
    },
    {
      "epoch": 0.040714999784753875,
      "grad_norm": 0.15664909780025482,
      "learning_rate": 1.675e-05,
      "loss": 0.6357,
      "step": 1330
    },
    {
      "epoch": 0.04074561256654693,
      "grad_norm": 0.18264542520046234,
      "learning_rate": 1.6725000000000003e-05,
      "loss": 0.7605,
      "step": 1331
    },
    {
      "epoch": 0.040776225348339974,
      "grad_norm": 0.17458516359329224,
      "learning_rate": 1.6700000000000003e-05,
      "loss": 0.6642,
      "step": 1332
    },
    {
      "epoch": 0.04080683813013302,
      "grad_norm": 0.2196420580148697,
      "learning_rate": 1.6675000000000002e-05,
      "loss": 0.6427,
      "step": 1333
    },
    {
      "epoch": 0.04083745091192607,
      "grad_norm": 0.24097728729248047,
      "learning_rate": 1.665e-05,
      "loss": 0.9129,
      "step": 1334
    },
    {
      "epoch": 0.04086806369371912,
      "grad_norm": 0.21499298512935638,
      "learning_rate": 1.6625e-05,
      "loss": 0.7542,
      "step": 1335
    },
    {
      "epoch": 0.040898676475512166,
      "grad_norm": 0.24109140038490295,
      "learning_rate": 1.66e-05,
      "loss": 0.6229,
      "step": 1336
    },
    {
      "epoch": 0.04092928925730521,
      "grad_norm": 0.15431708097457886,
      "learning_rate": 1.6575000000000003e-05,
      "loss": 0.718,
      "step": 1337
    },
    {
      "epoch": 0.040959902039098266,
      "grad_norm": 0.2153838723897934,
      "learning_rate": 1.6550000000000002e-05,
      "loss": 0.7122,
      "step": 1338
    },
    {
      "epoch": 0.04099051482089131,
      "grad_norm": 0.17694485187530518,
      "learning_rate": 1.6525000000000002e-05,
      "loss": 0.7197,
      "step": 1339
    },
    {
      "epoch": 0.04102112760268436,
      "grad_norm": 0.17731429636478424,
      "learning_rate": 1.65e-05,
      "loss": 0.6956,
      "step": 1340
    },
    {
      "epoch": 0.041051740384477405,
      "grad_norm": 0.21813912689685822,
      "learning_rate": 1.6475e-05,
      "loss": 0.8016,
      "step": 1341
    },
    {
      "epoch": 0.04108235316627046,
      "grad_norm": 0.24199146032333374,
      "learning_rate": 1.645e-05,
      "loss": 0.7931,
      "step": 1342
    },
    {
      "epoch": 0.041112965948063504,
      "grad_norm": 0.14052605628967285,
      "learning_rate": 1.6425000000000003e-05,
      "loss": 0.6865,
      "step": 1343
    },
    {
      "epoch": 0.04114357872985655,
      "grad_norm": 0.17013974487781525,
      "learning_rate": 1.6400000000000002e-05,
      "loss": 0.7362,
      "step": 1344
    },
    {
      "epoch": 0.0411741915116496,
      "grad_norm": 0.2560582458972931,
      "learning_rate": 1.6375e-05,
      "loss": 0.629,
      "step": 1345
    },
    {
      "epoch": 0.04120480429344265,
      "grad_norm": 0.2156529724597931,
      "learning_rate": 1.635e-05,
      "loss": 0.7256,
      "step": 1346
    },
    {
      "epoch": 0.041235417075235696,
      "grad_norm": 0.16630205512046814,
      "learning_rate": 1.6325e-05,
      "loss": 0.7503,
      "step": 1347
    },
    {
      "epoch": 0.04126602985702874,
      "grad_norm": 0.16027429699897766,
      "learning_rate": 1.63e-05,
      "loss": 0.7012,
      "step": 1348
    },
    {
      "epoch": 0.04129664263882179,
      "grad_norm": 0.9667307138442993,
      "learning_rate": 1.6275000000000003e-05,
      "loss": 0.8404,
      "step": 1349
    },
    {
      "epoch": 0.04132725542061484,
      "grad_norm": 0.31280195713043213,
      "learning_rate": 1.6250000000000002e-05,
      "loss": 0.7468,
      "step": 1350
    },
    {
      "epoch": 0.04135786820240789,
      "grad_norm": 0.15684044361114502,
      "learning_rate": 1.6225e-05,
      "loss": 0.6178,
      "step": 1351
    },
    {
      "epoch": 0.041388480984200934,
      "grad_norm": 0.2716297209262848,
      "learning_rate": 1.62e-05,
      "loss": 0.7807,
      "step": 1352
    },
    {
      "epoch": 0.04141909376599398,
      "grad_norm": 0.18027831614017487,
      "learning_rate": 1.6175e-05,
      "loss": 0.7919,
      "step": 1353
    },
    {
      "epoch": 0.041449706547787034,
      "grad_norm": 0.37246569991111755,
      "learning_rate": 1.6150000000000003e-05,
      "loss": 0.6932,
      "step": 1354
    },
    {
      "epoch": 0.04148031932958008,
      "grad_norm": 0.21788881719112396,
      "learning_rate": 1.6125000000000002e-05,
      "loss": 0.7036,
      "step": 1355
    },
    {
      "epoch": 0.041510932111373126,
      "grad_norm": 0.1785667985677719,
      "learning_rate": 1.6100000000000002e-05,
      "loss": 0.8148,
      "step": 1356
    },
    {
      "epoch": 0.04154154489316617,
      "grad_norm": 0.1936718076467514,
      "learning_rate": 1.6075e-05,
      "loss": 0.7927,
      "step": 1357
    },
    {
      "epoch": 0.041572157674959226,
      "grad_norm": 0.18061251938343048,
      "learning_rate": 1.605e-05,
      "loss": 0.8847,
      "step": 1358
    },
    {
      "epoch": 0.04160277045675227,
      "grad_norm": 0.20259565114974976,
      "learning_rate": 1.6025e-05,
      "loss": 0.729,
      "step": 1359
    },
    {
      "epoch": 0.04163338323854532,
      "grad_norm": 0.17487627267837524,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 0.7704,
      "step": 1360
    },
    {
      "epoch": 0.041663996020338365,
      "grad_norm": 0.5668933391571045,
      "learning_rate": 1.5975000000000002e-05,
      "loss": 0.617,
      "step": 1361
    },
    {
      "epoch": 0.04169460880213142,
      "grad_norm": 0.18771401047706604,
      "learning_rate": 1.595e-05,
      "loss": 0.753,
      "step": 1362
    },
    {
      "epoch": 0.041725221583924464,
      "grad_norm": 0.1781155914068222,
      "learning_rate": 1.5925e-05,
      "loss": 0.7384,
      "step": 1363
    },
    {
      "epoch": 0.04175583436571751,
      "grad_norm": 0.17328882217407227,
      "learning_rate": 1.59e-05,
      "loss": 0.8173,
      "step": 1364
    },
    {
      "epoch": 0.04178644714751056,
      "grad_norm": 0.21196331083774567,
      "learning_rate": 1.5875e-05,
      "loss": 0.818,
      "step": 1365
    },
    {
      "epoch": 0.04181705992930361,
      "grad_norm": 0.1945796012878418,
      "learning_rate": 1.5850000000000002e-05,
      "loss": 0.7287,
      "step": 1366
    },
    {
      "epoch": 0.041847672711096656,
      "grad_norm": 0.16711348295211792,
      "learning_rate": 1.5825000000000002e-05,
      "loss": 0.6716,
      "step": 1367
    },
    {
      "epoch": 0.0418782854928897,
      "grad_norm": 0.1832965910434723,
      "learning_rate": 1.58e-05,
      "loss": 0.7556,
      "step": 1368
    },
    {
      "epoch": 0.04190889827468275,
      "grad_norm": 0.20860032737255096,
      "learning_rate": 1.5775e-05,
      "loss": 0.6851,
      "step": 1369
    },
    {
      "epoch": 0.0419395110564758,
      "grad_norm": 0.1943766474723816,
      "learning_rate": 1.575e-05,
      "loss": 0.743,
      "step": 1370
    },
    {
      "epoch": 0.04197012383826885,
      "grad_norm": 0.31595706939697266,
      "learning_rate": 1.5725e-05,
      "loss": 0.9053,
      "step": 1371
    },
    {
      "epoch": 0.042000736620061895,
      "grad_norm": 0.2394976168870926,
      "learning_rate": 1.5700000000000002e-05,
      "loss": 0.8751,
      "step": 1372
    },
    {
      "epoch": 0.04203134940185494,
      "grad_norm": 0.21435868740081787,
      "learning_rate": 1.5675e-05,
      "loss": 0.8078,
      "step": 1373
    },
    {
      "epoch": 0.042061962183647994,
      "grad_norm": 0.2448282688856125,
      "learning_rate": 1.565e-05,
      "loss": 0.707,
      "step": 1374
    },
    {
      "epoch": 0.04209257496544104,
      "grad_norm": 0.3694523274898529,
      "learning_rate": 1.5625e-05,
      "loss": 0.8031,
      "step": 1375
    },
    {
      "epoch": 0.04212318774723409,
      "grad_norm": 0.24920806288719177,
      "learning_rate": 1.56e-05,
      "loss": 0.823,
      "step": 1376
    },
    {
      "epoch": 0.04215380052902713,
      "grad_norm": 0.22014984488487244,
      "learning_rate": 1.5575e-05,
      "loss": 0.6812,
      "step": 1377
    },
    {
      "epoch": 0.042184413310820186,
      "grad_norm": 0.17153052985668182,
      "learning_rate": 1.5550000000000002e-05,
      "loss": 0.7536,
      "step": 1378
    },
    {
      "epoch": 0.04221502609261323,
      "grad_norm": 0.34825605154037476,
      "learning_rate": 1.5525e-05,
      "loss": 0.7428,
      "step": 1379
    },
    {
      "epoch": 0.04224563887440628,
      "grad_norm": 0.20849856734275818,
      "learning_rate": 1.55e-05,
      "loss": 0.7563,
      "step": 1380
    },
    {
      "epoch": 0.042276251656199325,
      "grad_norm": 0.2631855309009552,
      "learning_rate": 1.5475e-05,
      "loss": 0.7722,
      "step": 1381
    },
    {
      "epoch": 0.04230686443799238,
      "grad_norm": 0.21010124683380127,
      "learning_rate": 1.545e-05,
      "loss": 0.6802,
      "step": 1382
    },
    {
      "epoch": 0.042337477219785424,
      "grad_norm": 0.1926780343055725,
      "learning_rate": 1.5425000000000002e-05,
      "loss": 0.9126,
      "step": 1383
    },
    {
      "epoch": 0.04236809000157847,
      "grad_norm": 0.32453134655952454,
      "learning_rate": 1.54e-05,
      "loss": 0.699,
      "step": 1384
    },
    {
      "epoch": 0.04239870278337152,
      "grad_norm": 0.18458174169063568,
      "learning_rate": 1.5375e-05,
      "loss": 0.8349,
      "step": 1385
    },
    {
      "epoch": 0.04242931556516457,
      "grad_norm": 0.2262679785490036,
      "learning_rate": 1.535e-05,
      "loss": 0.6412,
      "step": 1386
    },
    {
      "epoch": 0.042459928346957616,
      "grad_norm": 0.18162915110588074,
      "learning_rate": 1.5325e-05,
      "loss": 0.8022,
      "step": 1387
    },
    {
      "epoch": 0.04249054112875066,
      "grad_norm": 0.20319467782974243,
      "learning_rate": 1.53e-05,
      "loss": 0.7513,
      "step": 1388
    },
    {
      "epoch": 0.04252115391054371,
      "grad_norm": 0.24134382605552673,
      "learning_rate": 1.5275000000000002e-05,
      "loss": 0.6465,
      "step": 1389
    },
    {
      "epoch": 0.04255176669233676,
      "grad_norm": 0.26331713795661926,
      "learning_rate": 1.525e-05,
      "loss": 0.6273,
      "step": 1390
    },
    {
      "epoch": 0.04258237947412981,
      "grad_norm": 0.18230004608631134,
      "learning_rate": 1.5225e-05,
      "loss": 0.6903,
      "step": 1391
    },
    {
      "epoch": 0.042612992255922855,
      "grad_norm": 0.1559157818555832,
      "learning_rate": 1.52e-05,
      "loss": 0.6399,
      "step": 1392
    },
    {
      "epoch": 0.0426436050377159,
      "grad_norm": 2.45003342628479,
      "learning_rate": 1.5175e-05,
      "loss": 0.7562,
      "step": 1393
    },
    {
      "epoch": 0.042674217819508954,
      "grad_norm": 0.18312406539916992,
      "learning_rate": 1.515e-05,
      "loss": 0.6549,
      "step": 1394
    },
    {
      "epoch": 0.042704830601302,
      "grad_norm": 0.12454316765069962,
      "learning_rate": 1.5125e-05,
      "loss": 0.7069,
      "step": 1395
    },
    {
      "epoch": 0.04273544338309505,
      "grad_norm": 0.23549741506576538,
      "learning_rate": 1.51e-05,
      "loss": 0.7457,
      "step": 1396
    },
    {
      "epoch": 0.04276605616488809,
      "grad_norm": 0.17220689356327057,
      "learning_rate": 1.5075e-05,
      "loss": 0.7145,
      "step": 1397
    },
    {
      "epoch": 0.042796668946681146,
      "grad_norm": 0.14014922082424164,
      "learning_rate": 1.505e-05,
      "loss": 0.7441,
      "step": 1398
    },
    {
      "epoch": 0.04282728172847419,
      "grad_norm": 0.20306576788425446,
      "learning_rate": 1.5025000000000001e-05,
      "loss": 0.754,
      "step": 1399
    },
    {
      "epoch": 0.04285789451026724,
      "grad_norm": 0.28949132561683655,
      "learning_rate": 1.5e-05,
      "loss": 0.6302,
      "step": 1400
    },
    {
      "epoch": 0.042888507292060285,
      "grad_norm": 0.17676953971385956,
      "learning_rate": 1.4975e-05,
      "loss": 0.7235,
      "step": 1401
    },
    {
      "epoch": 0.04291912007385334,
      "grad_norm": 0.21547462046146393,
      "learning_rate": 1.4950000000000001e-05,
      "loss": 0.8782,
      "step": 1402
    },
    {
      "epoch": 0.042949732855646384,
      "grad_norm": 0.223711758852005,
      "learning_rate": 1.4925e-05,
      "loss": 0.7219,
      "step": 1403
    },
    {
      "epoch": 0.04298034563743943,
      "grad_norm": 0.2806595265865326,
      "learning_rate": 1.49e-05,
      "loss": 0.724,
      "step": 1404
    },
    {
      "epoch": 0.04301095841923248,
      "grad_norm": 0.2092374861240387,
      "learning_rate": 1.4875e-05,
      "loss": 0.6904,
      "step": 1405
    },
    {
      "epoch": 0.04304157120102553,
      "grad_norm": 0.2021576166152954,
      "learning_rate": 1.485e-05,
      "loss": 0.635,
      "step": 1406
    },
    {
      "epoch": 0.043072183982818577,
      "grad_norm": 0.49858757853507996,
      "learning_rate": 1.4825e-05,
      "loss": 0.7346,
      "step": 1407
    },
    {
      "epoch": 0.04310279676461162,
      "grad_norm": 0.36597567796707153,
      "learning_rate": 1.48e-05,
      "loss": 0.8145,
      "step": 1408
    },
    {
      "epoch": 0.04313340954640467,
      "grad_norm": 0.18702943623065948,
      "learning_rate": 1.4775e-05,
      "loss": 0.7971,
      "step": 1409
    },
    {
      "epoch": 0.04316402232819772,
      "grad_norm": 0.2622692286968231,
      "learning_rate": 1.475e-05,
      "loss": 0.7517,
      "step": 1410
    },
    {
      "epoch": 0.04319463510999077,
      "grad_norm": 0.20652936398983002,
      "learning_rate": 1.4725e-05,
      "loss": 0.6208,
      "step": 1411
    },
    {
      "epoch": 0.043225247891783815,
      "grad_norm": 0.1816270649433136,
      "learning_rate": 1.47e-05,
      "loss": 0.7186,
      "step": 1412
    },
    {
      "epoch": 0.04325586067357686,
      "grad_norm": 0.17441192269325256,
      "learning_rate": 1.4675e-05,
      "loss": 0.7815,
      "step": 1413
    },
    {
      "epoch": 0.043286473455369914,
      "grad_norm": 0.26693469285964966,
      "learning_rate": 1.465e-05,
      "loss": 0.7197,
      "step": 1414
    },
    {
      "epoch": 0.04331708623716296,
      "grad_norm": 0.19295130670070648,
      "learning_rate": 1.4625e-05,
      "loss": 0.6659,
      "step": 1415
    },
    {
      "epoch": 0.04334769901895601,
      "grad_norm": 0.17871519923210144,
      "learning_rate": 1.4599999999999999e-05,
      "loss": 0.7039,
      "step": 1416
    },
    {
      "epoch": 0.04337831180074905,
      "grad_norm": 0.2626918852329254,
      "learning_rate": 1.4575e-05,
      "loss": 0.6778,
      "step": 1417
    },
    {
      "epoch": 0.043408924582542106,
      "grad_norm": 0.2150593400001526,
      "learning_rate": 1.455e-05,
      "loss": 0.6561,
      "step": 1418
    },
    {
      "epoch": 0.04343953736433515,
      "grad_norm": 0.16079658269882202,
      "learning_rate": 1.4524999999999999e-05,
      "loss": 0.7748,
      "step": 1419
    },
    {
      "epoch": 0.0434701501461282,
      "grad_norm": 0.21796613931655884,
      "learning_rate": 1.45e-05,
      "loss": 0.738,
      "step": 1420
    },
    {
      "epoch": 0.043500762927921245,
      "grad_norm": 0.28668728470802307,
      "learning_rate": 1.4475e-05,
      "loss": 0.719,
      "step": 1421
    },
    {
      "epoch": 0.0435313757097143,
      "grad_norm": 0.2745479941368103,
      "learning_rate": 1.4449999999999999e-05,
      "loss": 0.676,
      "step": 1422
    },
    {
      "epoch": 0.043561988491507345,
      "grad_norm": 0.19976696372032166,
      "learning_rate": 1.4425e-05,
      "loss": 0.6932,
      "step": 1423
    },
    {
      "epoch": 0.04359260127330039,
      "grad_norm": 0.19255882501602173,
      "learning_rate": 1.44e-05,
      "loss": 0.6258,
      "step": 1424
    },
    {
      "epoch": 0.043623214055093444,
      "grad_norm": 0.20796534419059753,
      "learning_rate": 1.4374999999999999e-05,
      "loss": 0.7697,
      "step": 1425
    },
    {
      "epoch": 0.04365382683688649,
      "grad_norm": 0.20839112997055054,
      "learning_rate": 1.435e-05,
      "loss": 0.6305,
      "step": 1426
    },
    {
      "epoch": 0.04368443961867954,
      "grad_norm": 0.18373258411884308,
      "learning_rate": 1.4325e-05,
      "loss": 0.7644,
      "step": 1427
    },
    {
      "epoch": 0.04371505240047258,
      "grad_norm": 0.23528656363487244,
      "learning_rate": 1.43e-05,
      "loss": 0.7344,
      "step": 1428
    },
    {
      "epoch": 0.043745665182265636,
      "grad_norm": 0.17573142051696777,
      "learning_rate": 1.4275e-05,
      "loss": 0.6919,
      "step": 1429
    },
    {
      "epoch": 0.04377627796405868,
      "grad_norm": 0.20182746648788452,
      "learning_rate": 1.4249999999999999e-05,
      "loss": 0.7131,
      "step": 1430
    },
    {
      "epoch": 0.04380689074585173,
      "grad_norm": 0.25240468978881836,
      "learning_rate": 1.4225e-05,
      "loss": 0.83,
      "step": 1431
    },
    {
      "epoch": 0.043837503527644775,
      "grad_norm": 0.18400835990905762,
      "learning_rate": 1.42e-05,
      "loss": 0.8342,
      "step": 1432
    },
    {
      "epoch": 0.04386811630943783,
      "grad_norm": 0.17109465599060059,
      "learning_rate": 1.4174999999999999e-05,
      "loss": 0.7799,
      "step": 1433
    },
    {
      "epoch": 0.043898729091230874,
      "grad_norm": 0.2341393381357193,
      "learning_rate": 1.415e-05,
      "loss": 0.7245,
      "step": 1434
    },
    {
      "epoch": 0.04392934187302392,
      "grad_norm": 0.26290664076805115,
      "learning_rate": 1.4125e-05,
      "loss": 0.8434,
      "step": 1435
    },
    {
      "epoch": 0.04395995465481697,
      "grad_norm": 0.18068936467170715,
      "learning_rate": 1.4099999999999999e-05,
      "loss": 0.669,
      "step": 1436
    },
    {
      "epoch": 0.04399056743661002,
      "grad_norm": 0.1627107858657837,
      "learning_rate": 1.4075e-05,
      "loss": 0.6441,
      "step": 1437
    },
    {
      "epoch": 0.044021180218403066,
      "grad_norm": 0.18794555962085724,
      "learning_rate": 1.4050000000000003e-05,
      "loss": 0.7504,
      "step": 1438
    },
    {
      "epoch": 0.04405179300019611,
      "grad_norm": 0.20481038093566895,
      "learning_rate": 1.4025000000000002e-05,
      "loss": 0.7585,
      "step": 1439
    },
    {
      "epoch": 0.04408240578198916,
      "grad_norm": 0.29924920201301575,
      "learning_rate": 1.4000000000000001e-05,
      "loss": 0.7444,
      "step": 1440
    },
    {
      "epoch": 0.04411301856378221,
      "grad_norm": 0.2157646119594574,
      "learning_rate": 1.3975000000000003e-05,
      "loss": 0.7772,
      "step": 1441
    },
    {
      "epoch": 0.04414363134557526,
      "grad_norm": 0.5474746823310852,
      "learning_rate": 1.3950000000000002e-05,
      "loss": 0.8103,
      "step": 1442
    },
    {
      "epoch": 0.044174244127368305,
      "grad_norm": 0.20608457922935486,
      "learning_rate": 1.3925000000000001e-05,
      "loss": 0.7836,
      "step": 1443
    },
    {
      "epoch": 0.04420485690916135,
      "grad_norm": 0.20454329252243042,
      "learning_rate": 1.3900000000000002e-05,
      "loss": 0.7988,
      "step": 1444
    },
    {
      "epoch": 0.044235469690954404,
      "grad_norm": 0.20319156348705292,
      "learning_rate": 1.3875000000000002e-05,
      "loss": 0.7252,
      "step": 1445
    },
    {
      "epoch": 0.04426608247274745,
      "grad_norm": 0.1749924272298813,
      "learning_rate": 1.3850000000000001e-05,
      "loss": 0.7599,
      "step": 1446
    },
    {
      "epoch": 0.0442966952545405,
      "grad_norm": 0.1520671844482422,
      "learning_rate": 1.3825000000000002e-05,
      "loss": 0.7489,
      "step": 1447
    },
    {
      "epoch": 0.04432730803633354,
      "grad_norm": 0.22726348042488098,
      "learning_rate": 1.3800000000000002e-05,
      "loss": 0.7149,
      "step": 1448
    },
    {
      "epoch": 0.044357920818126596,
      "grad_norm": 0.14579953253269196,
      "learning_rate": 1.3775000000000001e-05,
      "loss": 0.6836,
      "step": 1449
    },
    {
      "epoch": 0.04438853359991964,
      "grad_norm": 0.4012090265750885,
      "learning_rate": 1.3750000000000002e-05,
      "loss": 0.6624,
      "step": 1450
    },
    {
      "epoch": 0.04441914638171269,
      "grad_norm": 0.31377822160720825,
      "learning_rate": 1.3725000000000002e-05,
      "loss": 0.7918,
      "step": 1451
    },
    {
      "epoch": 0.044449759163505735,
      "grad_norm": 0.13224174082279205,
      "learning_rate": 1.3700000000000001e-05,
      "loss": 0.6861,
      "step": 1452
    },
    {
      "epoch": 0.04448037194529879,
      "grad_norm": 0.16695737838745117,
      "learning_rate": 1.3675000000000002e-05,
      "loss": 0.7898,
      "step": 1453
    },
    {
      "epoch": 0.044510984727091835,
      "grad_norm": 0.21031008660793304,
      "learning_rate": 1.3650000000000001e-05,
      "loss": 0.6083,
      "step": 1454
    },
    {
      "epoch": 0.04454159750888488,
      "grad_norm": 0.1752035766839981,
      "learning_rate": 1.3625e-05,
      "loss": 0.7793,
      "step": 1455
    },
    {
      "epoch": 0.04457221029067793,
      "grad_norm": 0.16824465990066528,
      "learning_rate": 1.3600000000000002e-05,
      "loss": 0.6464,
      "step": 1456
    },
    {
      "epoch": 0.04460282307247098,
      "grad_norm": 0.2967368960380554,
      "learning_rate": 1.3575000000000001e-05,
      "loss": 0.8503,
      "step": 1457
    },
    {
      "epoch": 0.04463343585426403,
      "grad_norm": 0.4499569535255432,
      "learning_rate": 1.3550000000000002e-05,
      "loss": 0.7159,
      "step": 1458
    },
    {
      "epoch": 0.04466404863605707,
      "grad_norm": 0.32148292660713196,
      "learning_rate": 1.3525000000000002e-05,
      "loss": 0.8754,
      "step": 1459
    },
    {
      "epoch": 0.04469466141785012,
      "grad_norm": 0.19951559603214264,
      "learning_rate": 1.3500000000000001e-05,
      "loss": 0.7808,
      "step": 1460
    },
    {
      "epoch": 0.04472527419964317,
      "grad_norm": 0.3494749069213867,
      "learning_rate": 1.3475000000000002e-05,
      "loss": 0.8427,
      "step": 1461
    },
    {
      "epoch": 0.04475588698143622,
      "grad_norm": 0.2147471308708191,
      "learning_rate": 1.3450000000000002e-05,
      "loss": 0.7674,
      "step": 1462
    },
    {
      "epoch": 0.044786499763229265,
      "grad_norm": 0.3213605284690857,
      "learning_rate": 1.3425000000000001e-05,
      "loss": 0.8957,
      "step": 1463
    },
    {
      "epoch": 0.04481711254502231,
      "grad_norm": 0.20757092535495758,
      "learning_rate": 1.3400000000000002e-05,
      "loss": 0.5871,
      "step": 1464
    },
    {
      "epoch": 0.044847725326815364,
      "grad_norm": 0.4973711371421814,
      "learning_rate": 1.3375000000000002e-05,
      "loss": 0.8118,
      "step": 1465
    },
    {
      "epoch": 0.04487833810860841,
      "grad_norm": 0.19996313750743866,
      "learning_rate": 1.3350000000000001e-05,
      "loss": 0.8117,
      "step": 1466
    },
    {
      "epoch": 0.04490895089040146,
      "grad_norm": 0.18409158289432526,
      "learning_rate": 1.3325000000000002e-05,
      "loss": 0.6964,
      "step": 1467
    },
    {
      "epoch": 0.0449395636721945,
      "grad_norm": 0.19245922565460205,
      "learning_rate": 1.3300000000000001e-05,
      "loss": 0.7324,
      "step": 1468
    },
    {
      "epoch": 0.044970176453987556,
      "grad_norm": 0.16624824702739716,
      "learning_rate": 1.3275e-05,
      "loss": 0.7058,
      "step": 1469
    },
    {
      "epoch": 0.0450007892357806,
      "grad_norm": 0.2286311388015747,
      "learning_rate": 1.3250000000000002e-05,
      "loss": 0.8327,
      "step": 1470
    },
    {
      "epoch": 0.04503140201757365,
      "grad_norm": 0.16415594518184662,
      "learning_rate": 1.3225000000000001e-05,
      "loss": 0.7884,
      "step": 1471
    },
    {
      "epoch": 0.045062014799366695,
      "grad_norm": 0.181612029671669,
      "learning_rate": 1.32e-05,
      "loss": 0.7758,
      "step": 1472
    },
    {
      "epoch": 0.04509262758115975,
      "grad_norm": 0.2104666531085968,
      "learning_rate": 1.3175000000000002e-05,
      "loss": 0.8775,
      "step": 1473
    },
    {
      "epoch": 0.045123240362952795,
      "grad_norm": 0.33913522958755493,
      "learning_rate": 1.3150000000000001e-05,
      "loss": 0.6308,
      "step": 1474
    },
    {
      "epoch": 0.04515385314474584,
      "grad_norm": 0.7873314619064331,
      "learning_rate": 1.3125e-05,
      "loss": 0.7438,
      "step": 1475
    },
    {
      "epoch": 0.04518446592653889,
      "grad_norm": 0.3000042736530304,
      "learning_rate": 1.3100000000000002e-05,
      "loss": 0.7742,
      "step": 1476
    },
    {
      "epoch": 0.04521507870833194,
      "grad_norm": 0.17038494348526,
      "learning_rate": 1.3075000000000001e-05,
      "loss": 0.7553,
      "step": 1477
    },
    {
      "epoch": 0.04524569149012499,
      "grad_norm": 0.22030872106552124,
      "learning_rate": 1.305e-05,
      "loss": 0.8188,
      "step": 1478
    },
    {
      "epoch": 0.04527630427191803,
      "grad_norm": 0.2120763659477234,
      "learning_rate": 1.3025000000000002e-05,
      "loss": 0.7779,
      "step": 1479
    },
    {
      "epoch": 0.04530691705371108,
      "grad_norm": 0.23140597343444824,
      "learning_rate": 1.3000000000000001e-05,
      "loss": 0.8103,
      "step": 1480
    },
    {
      "epoch": 0.04533752983550413,
      "grad_norm": 0.3727869391441345,
      "learning_rate": 1.2975e-05,
      "loss": 0.759,
      "step": 1481
    },
    {
      "epoch": 0.04536814261729718,
      "grad_norm": 0.19665087759494781,
      "learning_rate": 1.2950000000000001e-05,
      "loss": 0.7842,
      "step": 1482
    },
    {
      "epoch": 0.045398755399090225,
      "grad_norm": 0.26370784640312195,
      "learning_rate": 1.2925e-05,
      "loss": 0.7116,
      "step": 1483
    },
    {
      "epoch": 0.04542936818088327,
      "grad_norm": 0.18251299858093262,
      "learning_rate": 1.29e-05,
      "loss": 0.7627,
      "step": 1484
    },
    {
      "epoch": 0.045459980962676325,
      "grad_norm": 0.7517208456993103,
      "learning_rate": 1.2875000000000001e-05,
      "loss": 0.729,
      "step": 1485
    },
    {
      "epoch": 0.04549059374446937,
      "grad_norm": 0.6478447914123535,
      "learning_rate": 1.285e-05,
      "loss": 0.7746,
      "step": 1486
    },
    {
      "epoch": 0.04552120652626242,
      "grad_norm": 0.18933242559432983,
      "learning_rate": 1.2825000000000002e-05,
      "loss": 0.7115,
      "step": 1487
    },
    {
      "epoch": 0.04555181930805546,
      "grad_norm": 0.1596892774105072,
      "learning_rate": 1.2800000000000001e-05,
      "loss": 0.7823,
      "step": 1488
    },
    {
      "epoch": 0.04558243208984852,
      "grad_norm": 0.20259326696395874,
      "learning_rate": 1.2775e-05,
      "loss": 0.777,
      "step": 1489
    },
    {
      "epoch": 0.04561304487164156,
      "grad_norm": 0.2229137122631073,
      "learning_rate": 1.2750000000000002e-05,
      "loss": 0.7411,
      "step": 1490
    },
    {
      "epoch": 0.04564365765343461,
      "grad_norm": 0.17972135543823242,
      "learning_rate": 1.2725000000000001e-05,
      "loss": 0.8588,
      "step": 1491
    },
    {
      "epoch": 0.045674270435227655,
      "grad_norm": 0.2078058421611786,
      "learning_rate": 1.27e-05,
      "loss": 0.7788,
      "step": 1492
    },
    {
      "epoch": 0.04570488321702071,
      "grad_norm": 0.17842750251293182,
      "learning_rate": 1.2675000000000001e-05,
      "loss": 0.7649,
      "step": 1493
    },
    {
      "epoch": 0.045735495998813755,
      "grad_norm": 0.2040335237979889,
      "learning_rate": 1.2650000000000001e-05,
      "loss": 0.7665,
      "step": 1494
    },
    {
      "epoch": 0.0457661087806068,
      "grad_norm": 0.3642278015613556,
      "learning_rate": 1.2625e-05,
      "loss": 0.7129,
      "step": 1495
    },
    {
      "epoch": 0.04579672156239985,
      "grad_norm": 0.24137164652347565,
      "learning_rate": 1.2600000000000001e-05,
      "loss": 0.8748,
      "step": 1496
    },
    {
      "epoch": 0.0458273343441929,
      "grad_norm": 0.14990079402923584,
      "learning_rate": 1.2575e-05,
      "loss": 0.6895,
      "step": 1497
    },
    {
      "epoch": 0.04585794712598595,
      "grad_norm": 0.1949937343597412,
      "learning_rate": 1.255e-05,
      "loss": 0.7052,
      "step": 1498
    },
    {
      "epoch": 0.04588855990777899,
      "grad_norm": 1.220658540725708,
      "learning_rate": 1.2525000000000001e-05,
      "loss": 0.7688,
      "step": 1499
    },
    {
      "epoch": 0.04591917268957204,
      "grad_norm": 0.17901234328746796,
      "learning_rate": 1.25e-05,
      "loss": 0.7974,
      "step": 1500
    },
    {
      "epoch": 0.04594978547136509,
      "grad_norm": 0.21730190515518188,
      "learning_rate": 1.2475e-05,
      "loss": 0.5578,
      "step": 1501
    },
    {
      "epoch": 0.04598039825315814,
      "grad_norm": 0.2843235433101654,
      "learning_rate": 1.2450000000000001e-05,
      "loss": 0.673,
      "step": 1502
    },
    {
      "epoch": 0.046011011034951185,
      "grad_norm": 0.19560657441616058,
      "learning_rate": 1.2425e-05,
      "loss": 0.8286,
      "step": 1503
    },
    {
      "epoch": 0.04604162381674423,
      "grad_norm": 0.21679967641830444,
      "learning_rate": 1.24e-05,
      "loss": 0.7785,
      "step": 1504
    },
    {
      "epoch": 0.046072236598537285,
      "grad_norm": 0.2646417021751404,
      "learning_rate": 1.2375000000000001e-05,
      "loss": 0.7197,
      "step": 1505
    },
    {
      "epoch": 0.04610284938033033,
      "grad_norm": 0.17035622894763947,
      "learning_rate": 1.235e-05,
      "loss": 0.7864,
      "step": 1506
    },
    {
      "epoch": 0.04613346216212338,
      "grad_norm": 0.2841705083847046,
      "learning_rate": 1.2325e-05,
      "loss": 0.7692,
      "step": 1507
    },
    {
      "epoch": 0.04616407494391643,
      "grad_norm": 0.29930734634399414,
      "learning_rate": 1.23e-05,
      "loss": 0.815,
      "step": 1508
    },
    {
      "epoch": 0.04619468772570948,
      "grad_norm": 0.1707840859889984,
      "learning_rate": 1.2275e-05,
      "loss": 0.7728,
      "step": 1509
    },
    {
      "epoch": 0.04622530050750252,
      "grad_norm": 0.4592837691307068,
      "learning_rate": 1.225e-05,
      "loss": 0.6564,
      "step": 1510
    },
    {
      "epoch": 0.04625591328929557,
      "grad_norm": 0.1473517268896103,
      "learning_rate": 1.2225e-05,
      "loss": 0.7066,
      "step": 1511
    },
    {
      "epoch": 0.04628652607108862,
      "grad_norm": 0.17565388977527618,
      "learning_rate": 1.22e-05,
      "loss": 0.6322,
      "step": 1512
    },
    {
      "epoch": 0.04631713885288167,
      "grad_norm": 0.15995298326015472,
      "learning_rate": 1.2175e-05,
      "loss": 0.6539,
      "step": 1513
    },
    {
      "epoch": 0.046347751634674715,
      "grad_norm": 0.28813570737838745,
      "learning_rate": 1.215e-05,
      "loss": 0.6822,
      "step": 1514
    },
    {
      "epoch": 0.04637836441646776,
      "grad_norm": 2.9550673961639404,
      "learning_rate": 1.2125e-05,
      "loss": 0.7363,
      "step": 1515
    },
    {
      "epoch": 0.046408977198260815,
      "grad_norm": 0.23038910329341888,
      "learning_rate": 1.2100000000000001e-05,
      "loss": 0.6803,
      "step": 1516
    },
    {
      "epoch": 0.04643958998005386,
      "grad_norm": 0.2071835696697235,
      "learning_rate": 1.2075e-05,
      "loss": 0.829,
      "step": 1517
    },
    {
      "epoch": 0.04647020276184691,
      "grad_norm": 0.20737797021865845,
      "learning_rate": 1.205e-05,
      "loss": 0.7646,
      "step": 1518
    },
    {
      "epoch": 0.04650081554363995,
      "grad_norm": 0.2790365517139435,
      "learning_rate": 1.2025000000000001e-05,
      "loss": 0.671,
      "step": 1519
    },
    {
      "epoch": 0.04653142832543301,
      "grad_norm": 0.17174746096134186,
      "learning_rate": 1.2e-05,
      "loss": 0.8311,
      "step": 1520
    },
    {
      "epoch": 0.04656204110722605,
      "grad_norm": 0.19871912896633148,
      "learning_rate": 1.1975e-05,
      "loss": 0.8067,
      "step": 1521
    },
    {
      "epoch": 0.0465926538890191,
      "grad_norm": 0.9508015513420105,
      "learning_rate": 1.195e-05,
      "loss": 0.8949,
      "step": 1522
    },
    {
      "epoch": 0.046623266670812145,
      "grad_norm": 0.8237528204917908,
      "learning_rate": 1.1925e-05,
      "loss": 0.6498,
      "step": 1523
    },
    {
      "epoch": 0.0466538794526052,
      "grad_norm": 0.27851757407188416,
      "learning_rate": 1.19e-05,
      "loss": 0.8097,
      "step": 1524
    },
    {
      "epoch": 0.046684492234398245,
      "grad_norm": 0.2628514766693115,
      "learning_rate": 1.1875e-05,
      "loss": 0.7863,
      "step": 1525
    },
    {
      "epoch": 0.04671510501619129,
      "grad_norm": 0.20049241185188293,
      "learning_rate": 1.185e-05,
      "loss": 0.7382,
      "step": 1526
    },
    {
      "epoch": 0.04674571779798434,
      "grad_norm": 0.23242758214473724,
      "learning_rate": 1.1825e-05,
      "loss": 0.7003,
      "step": 1527
    },
    {
      "epoch": 0.04677633057977739,
      "grad_norm": 0.2905966341495514,
      "learning_rate": 1.18e-05,
      "loss": 0.8097,
      "step": 1528
    },
    {
      "epoch": 0.04680694336157044,
      "grad_norm": 0.8549328446388245,
      "learning_rate": 1.1775e-05,
      "loss": 0.8163,
      "step": 1529
    },
    {
      "epoch": 0.04683755614336348,
      "grad_norm": 0.30331408977508545,
      "learning_rate": 1.175e-05,
      "loss": 0.6397,
      "step": 1530
    },
    {
      "epoch": 0.04686816892515653,
      "grad_norm": 0.26909250020980835,
      "learning_rate": 1.1725e-05,
      "loss": 0.6628,
      "step": 1531
    },
    {
      "epoch": 0.04689878170694958,
      "grad_norm": 0.2025674283504486,
      "learning_rate": 1.1700000000000001e-05,
      "loss": 0.7952,
      "step": 1532
    },
    {
      "epoch": 0.04692939448874263,
      "grad_norm": 0.2271345853805542,
      "learning_rate": 1.1675000000000001e-05,
      "loss": 0.7329,
      "step": 1533
    },
    {
      "epoch": 0.046960007270535675,
      "grad_norm": 0.1598852127790451,
      "learning_rate": 1.1650000000000002e-05,
      "loss": 0.8319,
      "step": 1534
    },
    {
      "epoch": 0.04699062005232872,
      "grad_norm": 0.1538103222846985,
      "learning_rate": 1.1625000000000001e-05,
      "loss": 0.7478,
      "step": 1535
    },
    {
      "epoch": 0.047021232834121775,
      "grad_norm": 0.2078617960214615,
      "learning_rate": 1.16e-05,
      "loss": 0.6891,
      "step": 1536
    },
    {
      "epoch": 0.04705184561591482,
      "grad_norm": 0.19620837271213531,
      "learning_rate": 1.1575000000000002e-05,
      "loss": 0.7297,
      "step": 1537
    },
    {
      "epoch": 0.04708245839770787,
      "grad_norm": 0.30410417914390564,
      "learning_rate": 1.1550000000000001e-05,
      "loss": 0.7196,
      "step": 1538
    },
    {
      "epoch": 0.047113071179500914,
      "grad_norm": 0.2689773142337799,
      "learning_rate": 1.1525e-05,
      "loss": 0.9524,
      "step": 1539
    },
    {
      "epoch": 0.04714368396129397,
      "grad_norm": 0.3247095048427582,
      "learning_rate": 1.1500000000000002e-05,
      "loss": 0.8175,
      "step": 1540
    },
    {
      "epoch": 0.04717429674308701,
      "grad_norm": 0.2408241629600525,
      "learning_rate": 1.1475000000000001e-05,
      "loss": 0.8229,
      "step": 1541
    },
    {
      "epoch": 0.04720490952488006,
      "grad_norm": 0.25109750032424927,
      "learning_rate": 1.145e-05,
      "loss": 0.777,
      "step": 1542
    },
    {
      "epoch": 0.047235522306673106,
      "grad_norm": 0.769884467124939,
      "learning_rate": 1.1425000000000002e-05,
      "loss": 0.751,
      "step": 1543
    },
    {
      "epoch": 0.04726613508846616,
      "grad_norm": 0.25562018156051636,
      "learning_rate": 1.1400000000000001e-05,
      "loss": 0.699,
      "step": 1544
    },
    {
      "epoch": 0.047296747870259205,
      "grad_norm": 0.2240927815437317,
      "learning_rate": 1.1375e-05,
      "loss": 0.776,
      "step": 1545
    },
    {
      "epoch": 0.04732736065205225,
      "grad_norm": 0.14069287478923798,
      "learning_rate": 1.1350000000000001e-05,
      "loss": 0.7069,
      "step": 1546
    },
    {
      "epoch": 0.0473579734338453,
      "grad_norm": 0.175034299492836,
      "learning_rate": 1.1325e-05,
      "loss": 0.7102,
      "step": 1547
    },
    {
      "epoch": 0.04738858621563835,
      "grad_norm": 0.14178086817264557,
      "learning_rate": 1.13e-05,
      "loss": 0.7089,
      "step": 1548
    },
    {
      "epoch": 0.0474191989974314,
      "grad_norm": 0.2102005034685135,
      "learning_rate": 1.1275000000000001e-05,
      "loss": 0.7688,
      "step": 1549
    },
    {
      "epoch": 0.04744981177922444,
      "grad_norm": 0.1744409054517746,
      "learning_rate": 1.125e-05,
      "loss": 0.8313,
      "step": 1550
    },
    {
      "epoch": 0.04748042456101749,
      "grad_norm": 0.24438358843326569,
      "learning_rate": 1.1225e-05,
      "loss": 0.6834,
      "step": 1551
    },
    {
      "epoch": 0.04751103734281054,
      "grad_norm": 0.2174401730298996,
      "learning_rate": 1.1200000000000001e-05,
      "loss": 0.699,
      "step": 1552
    },
    {
      "epoch": 0.04754165012460359,
      "grad_norm": 0.2609294056892395,
      "learning_rate": 1.1175e-05,
      "loss": 0.7859,
      "step": 1553
    },
    {
      "epoch": 0.047572262906396635,
      "grad_norm": 0.17115375399589539,
      "learning_rate": 1.115e-05,
      "loss": 0.7099,
      "step": 1554
    },
    {
      "epoch": 0.04760287568818968,
      "grad_norm": 0.6217370629310608,
      "learning_rate": 1.1125000000000001e-05,
      "loss": 0.8711,
      "step": 1555
    },
    {
      "epoch": 0.047633488469982735,
      "grad_norm": 0.3955477476119995,
      "learning_rate": 1.11e-05,
      "loss": 0.6149,
      "step": 1556
    },
    {
      "epoch": 0.04766410125177578,
      "grad_norm": 0.2897701859474182,
      "learning_rate": 1.1075e-05,
      "loss": 0.8954,
      "step": 1557
    },
    {
      "epoch": 0.04769471403356883,
      "grad_norm": 0.2647947669029236,
      "learning_rate": 1.1050000000000001e-05,
      "loss": 0.6331,
      "step": 1558
    },
    {
      "epoch": 0.047725326815361874,
      "grad_norm": 0.21409998834133148,
      "learning_rate": 1.1025e-05,
      "loss": 0.8069,
      "step": 1559
    },
    {
      "epoch": 0.04775593959715493,
      "grad_norm": 0.18530602753162384,
      "learning_rate": 1.1000000000000001e-05,
      "loss": 0.7338,
      "step": 1560
    },
    {
      "epoch": 0.04778655237894797,
      "grad_norm": 0.34595510363578796,
      "learning_rate": 1.0975e-05,
      "loss": 0.6096,
      "step": 1561
    },
    {
      "epoch": 0.04781716516074102,
      "grad_norm": 0.522099494934082,
      "learning_rate": 1.095e-05,
      "loss": 0.8441,
      "step": 1562
    },
    {
      "epoch": 0.047847777942534066,
      "grad_norm": 0.21209700405597687,
      "learning_rate": 1.0925000000000001e-05,
      "loss": 0.7748,
      "step": 1563
    },
    {
      "epoch": 0.04787839072432712,
      "grad_norm": 0.21519571542739868,
      "learning_rate": 1.09e-05,
      "loss": 0.7266,
      "step": 1564
    },
    {
      "epoch": 0.047909003506120165,
      "grad_norm": 0.37689515948295593,
      "learning_rate": 1.0875e-05,
      "loss": 0.67,
      "step": 1565
    },
    {
      "epoch": 0.04793961628791321,
      "grad_norm": 0.18475137650966644,
      "learning_rate": 1.0850000000000001e-05,
      "loss": 0.8698,
      "step": 1566
    },
    {
      "epoch": 0.04797022906970626,
      "grad_norm": 0.27287888526916504,
      "learning_rate": 1.0825e-05,
      "loss": 0.7897,
      "step": 1567
    },
    {
      "epoch": 0.04800084185149931,
      "grad_norm": 0.3418515920639038,
      "learning_rate": 1.08e-05,
      "loss": 0.6458,
      "step": 1568
    },
    {
      "epoch": 0.04803145463329236,
      "grad_norm": 0.42239508032798767,
      "learning_rate": 1.0775000000000001e-05,
      "loss": 0.7408,
      "step": 1569
    },
    {
      "epoch": 0.048062067415085404,
      "grad_norm": 0.28834104537963867,
      "learning_rate": 1.075e-05,
      "loss": 0.8674,
      "step": 1570
    },
    {
      "epoch": 0.04809268019687845,
      "grad_norm": 0.3395974040031433,
      "learning_rate": 1.0725e-05,
      "loss": 0.7709,
      "step": 1571
    },
    {
      "epoch": 0.0481232929786715,
      "grad_norm": 0.2134561687707901,
      "learning_rate": 1.0700000000000001e-05,
      "loss": 0.6167,
      "step": 1572
    },
    {
      "epoch": 0.04815390576046455,
      "grad_norm": 0.19481435418128967,
      "learning_rate": 1.0675e-05,
      "loss": 0.7234,
      "step": 1573
    },
    {
      "epoch": 0.048184518542257596,
      "grad_norm": 0.2438262552022934,
      "learning_rate": 1.065e-05,
      "loss": 0.7326,
      "step": 1574
    },
    {
      "epoch": 0.04821513132405064,
      "grad_norm": 0.25424280762672424,
      "learning_rate": 1.0625e-05,
      "loss": 0.7583,
      "step": 1575
    },
    {
      "epoch": 0.048245744105843695,
      "grad_norm": 0.24908120930194855,
      "learning_rate": 1.06e-05,
      "loss": 0.7492,
      "step": 1576
    },
    {
      "epoch": 0.04827635688763674,
      "grad_norm": 0.18936116993427277,
      "learning_rate": 1.0575e-05,
      "loss": 0.6922,
      "step": 1577
    },
    {
      "epoch": 0.04830696966942979,
      "grad_norm": 0.2518194913864136,
      "learning_rate": 1.055e-05,
      "loss": 0.8189,
      "step": 1578
    },
    {
      "epoch": 0.048337582451222834,
      "grad_norm": 0.20476292073726654,
      "learning_rate": 1.0525e-05,
      "loss": 0.6236,
      "step": 1579
    },
    {
      "epoch": 0.04836819523301589,
      "grad_norm": 0.17108705639839172,
      "learning_rate": 1.05e-05,
      "loss": 0.725,
      "step": 1580
    },
    {
      "epoch": 0.04839880801480893,
      "grad_norm": 0.21263495087623596,
      "learning_rate": 1.0475e-05,
      "loss": 0.6627,
      "step": 1581
    },
    {
      "epoch": 0.04842942079660198,
      "grad_norm": 0.1911524534225464,
      "learning_rate": 1.045e-05,
      "loss": 0.6909,
      "step": 1582
    },
    {
      "epoch": 0.048460033578395026,
      "grad_norm": 0.2562510073184967,
      "learning_rate": 1.0425e-05,
      "loss": 0.5688,
      "step": 1583
    },
    {
      "epoch": 0.04849064636018808,
      "grad_norm": 0.5714108347892761,
      "learning_rate": 1.04e-05,
      "loss": 0.7423,
      "step": 1584
    },
    {
      "epoch": 0.048521259141981125,
      "grad_norm": 0.17824621498584747,
      "learning_rate": 1.0375e-05,
      "loss": 0.6054,
      "step": 1585
    },
    {
      "epoch": 0.04855187192377417,
      "grad_norm": 0.18194986879825592,
      "learning_rate": 1.035e-05,
      "loss": 0.6974,
      "step": 1586
    },
    {
      "epoch": 0.04858248470556722,
      "grad_norm": 0.1321507841348648,
      "learning_rate": 1.0325e-05,
      "loss": 0.6817,
      "step": 1587
    },
    {
      "epoch": 0.04861309748736027,
      "grad_norm": 0.2529693841934204,
      "learning_rate": 1.03e-05,
      "loss": 0.8123,
      "step": 1588
    },
    {
      "epoch": 0.04864371026915332,
      "grad_norm": 0.3626464307308197,
      "learning_rate": 1.0275e-05,
      "loss": 0.9334,
      "step": 1589
    },
    {
      "epoch": 0.048674323050946364,
      "grad_norm": 0.32221466302871704,
      "learning_rate": 1.025e-05,
      "loss": 0.6892,
      "step": 1590
    },
    {
      "epoch": 0.04870493583273942,
      "grad_norm": 0.2794777750968933,
      "learning_rate": 1.0225e-05,
      "loss": 0.6811,
      "step": 1591
    },
    {
      "epoch": 0.04873554861453246,
      "grad_norm": 0.44167646765708923,
      "learning_rate": 1.02e-05,
      "loss": 0.7679,
      "step": 1592
    },
    {
      "epoch": 0.04876616139632551,
      "grad_norm": 0.30259740352630615,
      "learning_rate": 1.0175e-05,
      "loss": 0.7731,
      "step": 1593
    },
    {
      "epoch": 0.048796774178118556,
      "grad_norm": 0.31337401270866394,
      "learning_rate": 1.0150000000000001e-05,
      "loss": 0.6514,
      "step": 1594
    },
    {
      "epoch": 0.04882738695991161,
      "grad_norm": 0.1332949697971344,
      "learning_rate": 1.0125e-05,
      "loss": 0.7161,
      "step": 1595
    },
    {
      "epoch": 0.048857999741704655,
      "grad_norm": 0.15922820568084717,
      "learning_rate": 1.0100000000000002e-05,
      "loss": 0.7888,
      "step": 1596
    },
    {
      "epoch": 0.0488886125234977,
      "grad_norm": 0.3080964982509613,
      "learning_rate": 1.0075000000000001e-05,
      "loss": 0.7783,
      "step": 1597
    },
    {
      "epoch": 0.04891922530529075,
      "grad_norm": 0.255107045173645,
      "learning_rate": 1.005e-05,
      "loss": 0.7853,
      "step": 1598
    },
    {
      "epoch": 0.0489498380870838,
      "grad_norm": 0.20637470483779907,
      "learning_rate": 1.0025000000000001e-05,
      "loss": 0.793,
      "step": 1599
    },
    {
      "epoch": 0.04898045086887685,
      "grad_norm": 0.2846757173538208,
      "learning_rate": 1e-05,
      "loss": 0.8348,
      "step": 1600
    },
    {
      "epoch": 0.049011063650669894,
      "grad_norm": 0.204476997256279,
      "learning_rate": 9.975e-06,
      "loss": 0.7415,
      "step": 1601
    },
    {
      "epoch": 0.04904167643246294,
      "grad_norm": 0.1692608892917633,
      "learning_rate": 9.950000000000001e-06,
      "loss": 0.7755,
      "step": 1602
    },
    {
      "epoch": 0.04907228921425599,
      "grad_norm": 0.4788927435874939,
      "learning_rate": 9.925e-06,
      "loss": 0.8095,
      "step": 1603
    },
    {
      "epoch": 0.04910290199604904,
      "grad_norm": 0.14311343431472778,
      "learning_rate": 9.900000000000002e-06,
      "loss": 0.6723,
      "step": 1604
    },
    {
      "epoch": 0.049133514777842086,
      "grad_norm": 0.1613636463880539,
      "learning_rate": 9.875000000000001e-06,
      "loss": 0.6537,
      "step": 1605
    },
    {
      "epoch": 0.04916412755963513,
      "grad_norm": 0.14396587014198303,
      "learning_rate": 9.85e-06,
      "loss": 0.6744,
      "step": 1606
    },
    {
      "epoch": 0.049194740341428185,
      "grad_norm": 0.23125985264778137,
      "learning_rate": 9.825000000000002e-06,
      "loss": 0.6825,
      "step": 1607
    },
    {
      "epoch": 0.04922535312322123,
      "grad_norm": 0.18390092253684998,
      "learning_rate": 9.800000000000001e-06,
      "loss": 0.7631,
      "step": 1608
    },
    {
      "epoch": 0.04925596590501428,
      "grad_norm": 0.18321500718593597,
      "learning_rate": 9.775e-06,
      "loss": 0.778,
      "step": 1609
    },
    {
      "epoch": 0.049286578686807324,
      "grad_norm": 0.18895719945430756,
      "learning_rate": 9.750000000000002e-06,
      "loss": 0.7314,
      "step": 1610
    },
    {
      "epoch": 0.04931719146860038,
      "grad_norm": 0.18812943994998932,
      "learning_rate": 9.725000000000001e-06,
      "loss": 0.6701,
      "step": 1611
    },
    {
      "epoch": 0.04934780425039342,
      "grad_norm": 0.2372865080833435,
      "learning_rate": 9.7e-06,
      "loss": 0.8956,
      "step": 1612
    },
    {
      "epoch": 0.04937841703218647,
      "grad_norm": 0.1964545100927353,
      "learning_rate": 9.675000000000001e-06,
      "loss": 0.8078,
      "step": 1613
    },
    {
      "epoch": 0.049409029813979516,
      "grad_norm": 0.18392175436019897,
      "learning_rate": 9.65e-06,
      "loss": 0.8344,
      "step": 1614
    },
    {
      "epoch": 0.04943964259577257,
      "grad_norm": 0.21107783913612366,
      "learning_rate": 9.625e-06,
      "loss": 0.6548,
      "step": 1615
    },
    {
      "epoch": 0.049470255377565615,
      "grad_norm": 0.127716526389122,
      "learning_rate": 9.600000000000001e-06,
      "loss": 0.6988,
      "step": 1616
    },
    {
      "epoch": 0.04950086815935866,
      "grad_norm": 0.22515739500522614,
      "learning_rate": 9.575e-06,
      "loss": 0.7556,
      "step": 1617
    },
    {
      "epoch": 0.04953148094115171,
      "grad_norm": 0.15679682791233063,
      "learning_rate": 9.55e-06,
      "loss": 0.78,
      "step": 1618
    },
    {
      "epoch": 0.04956209372294476,
      "grad_norm": 0.20511648058891296,
      "learning_rate": 9.525000000000001e-06,
      "loss": 0.792,
      "step": 1619
    },
    {
      "epoch": 0.04959270650473781,
      "grad_norm": 0.24771378934383392,
      "learning_rate": 9.5e-06,
      "loss": 0.7155,
      "step": 1620
    },
    {
      "epoch": 0.049623319286530854,
      "grad_norm": 0.4764645993709564,
      "learning_rate": 9.475e-06,
      "loss": 0.707,
      "step": 1621
    },
    {
      "epoch": 0.0496539320683239,
      "grad_norm": 0.1764499545097351,
      "learning_rate": 9.450000000000001e-06,
      "loss": 0.7198,
      "step": 1622
    },
    {
      "epoch": 0.04968454485011695,
      "grad_norm": 0.1413356363773346,
      "learning_rate": 9.425e-06,
      "loss": 0.6286,
      "step": 1623
    },
    {
      "epoch": 0.04971515763191,
      "grad_norm": 0.2665606439113617,
      "learning_rate": 9.4e-06,
      "loss": 0.8979,
      "step": 1624
    },
    {
      "epoch": 0.049745770413703046,
      "grad_norm": 0.1769164800643921,
      "learning_rate": 9.375000000000001e-06,
      "loss": 0.8218,
      "step": 1625
    },
    {
      "epoch": 0.04977638319549609,
      "grad_norm": 0.1657216101884842,
      "learning_rate": 9.35e-06,
      "loss": 0.8044,
      "step": 1626
    },
    {
      "epoch": 0.049806995977289145,
      "grad_norm": 0.22014309465885162,
      "learning_rate": 9.325e-06,
      "loss": 0.7668,
      "step": 1627
    },
    {
      "epoch": 0.04983760875908219,
      "grad_norm": 0.2394803762435913,
      "learning_rate": 9.3e-06,
      "loss": 0.7533,
      "step": 1628
    },
    {
      "epoch": 0.04986822154087524,
      "grad_norm": 0.2107066661119461,
      "learning_rate": 9.275e-06,
      "loss": 0.6471,
      "step": 1629
    },
    {
      "epoch": 0.049898834322668284,
      "grad_norm": 0.1769438087940216,
      "learning_rate": 9.25e-06,
      "loss": 0.7524,
      "step": 1630
    },
    {
      "epoch": 0.04992944710446134,
      "grad_norm": 0.38696393370628357,
      "learning_rate": 9.225e-06,
      "loss": 0.6328,
      "step": 1631
    },
    {
      "epoch": 0.049960059886254383,
      "grad_norm": 0.6218041777610779,
      "learning_rate": 9.2e-06,
      "loss": 0.873,
      "step": 1632
    },
    {
      "epoch": 0.04999067266804743,
      "grad_norm": 0.2131049633026123,
      "learning_rate": 9.175000000000001e-06,
      "loss": 0.6417,
      "step": 1633
    },
    {
      "epoch": 0.050021285449840476,
      "grad_norm": 0.19039735198020935,
      "learning_rate": 9.15e-06,
      "loss": 0.6757,
      "step": 1634
    },
    {
      "epoch": 0.05005189823163353,
      "grad_norm": 0.22656527161598206,
      "learning_rate": 9.125e-06,
      "loss": 0.6735,
      "step": 1635
    },
    {
      "epoch": 0.050082511013426576,
      "grad_norm": 0.4664088487625122,
      "learning_rate": 9.100000000000001e-06,
      "loss": 0.6711,
      "step": 1636
    },
    {
      "epoch": 0.05011312379521962,
      "grad_norm": 0.14818334579467773,
      "learning_rate": 9.075e-06,
      "loss": 0.7268,
      "step": 1637
    },
    {
      "epoch": 0.05014373657701267,
      "grad_norm": 0.24351781606674194,
      "learning_rate": 9.05e-06,
      "loss": 0.7873,
      "step": 1638
    },
    {
      "epoch": 0.05017434935880572,
      "grad_norm": 0.3813919126987457,
      "learning_rate": 9.025e-06,
      "loss": 0.6998,
      "step": 1639
    },
    {
      "epoch": 0.05020496214059877,
      "grad_norm": 0.21142300963401794,
      "learning_rate": 9e-06,
      "loss": 0.7294,
      "step": 1640
    },
    {
      "epoch": 0.050235574922391814,
      "grad_norm": 0.1697981208562851,
      "learning_rate": 8.975e-06,
      "loss": 0.7557,
      "step": 1641
    },
    {
      "epoch": 0.05026618770418486,
      "grad_norm": 0.19358620047569275,
      "learning_rate": 8.95e-06,
      "loss": 0.7715,
      "step": 1642
    },
    {
      "epoch": 0.05029680048597791,
      "grad_norm": 0.1711193025112152,
      "learning_rate": 8.925e-06,
      "loss": 0.7499,
      "step": 1643
    },
    {
      "epoch": 0.05032741326777096,
      "grad_norm": 0.503459095954895,
      "learning_rate": 8.9e-06,
      "loss": 0.607,
      "step": 1644
    },
    {
      "epoch": 0.050358026049564006,
      "grad_norm": 0.315213680267334,
      "learning_rate": 8.875e-06,
      "loss": 0.6301,
      "step": 1645
    },
    {
      "epoch": 0.05038863883135705,
      "grad_norm": 0.3684975504875183,
      "learning_rate": 8.85e-06,
      "loss": 0.6873,
      "step": 1646
    },
    {
      "epoch": 0.050419251613150105,
      "grad_norm": 0.16170641779899597,
      "learning_rate": 8.825e-06,
      "loss": 0.697,
      "step": 1647
    },
    {
      "epoch": 0.05044986439494315,
      "grad_norm": 0.1979825794696808,
      "learning_rate": 8.8e-06,
      "loss": 0.7175,
      "step": 1648
    },
    {
      "epoch": 0.0504804771767362,
      "grad_norm": 1.0185275077819824,
      "learning_rate": 8.775e-06,
      "loss": 0.8808,
      "step": 1649
    },
    {
      "epoch": 0.050511089958529244,
      "grad_norm": 0.1964053511619568,
      "learning_rate": 8.75e-06,
      "loss": 0.8463,
      "step": 1650
    },
    {
      "epoch": 0.0505417027403223,
      "grad_norm": 0.17201021313667297,
      "learning_rate": 8.725e-06,
      "loss": 0.6943,
      "step": 1651
    },
    {
      "epoch": 0.050572315522115344,
      "grad_norm": 0.557744026184082,
      "learning_rate": 8.7e-06,
      "loss": 0.7528,
      "step": 1652
    },
    {
      "epoch": 0.05060292830390839,
      "grad_norm": 0.3689303696155548,
      "learning_rate": 8.674999999999999e-06,
      "loss": 0.8374,
      "step": 1653
    },
    {
      "epoch": 0.050633541085701436,
      "grad_norm": 0.18315868079662323,
      "learning_rate": 8.65e-06,
      "loss": 0.5929,
      "step": 1654
    },
    {
      "epoch": 0.05066415386749449,
      "grad_norm": 0.1540217101573944,
      "learning_rate": 8.625e-06,
      "loss": 0.6234,
      "step": 1655
    },
    {
      "epoch": 0.050694766649287536,
      "grad_norm": 0.19729679822921753,
      "learning_rate": 8.599999999999999e-06,
      "loss": 0.8595,
      "step": 1656
    },
    {
      "epoch": 0.05072537943108058,
      "grad_norm": 0.27182450890541077,
      "learning_rate": 8.575000000000002e-06,
      "loss": 0.7252,
      "step": 1657
    },
    {
      "epoch": 0.05075599221287363,
      "grad_norm": 0.17630839347839355,
      "learning_rate": 8.550000000000001e-06,
      "loss": 0.707,
      "step": 1658
    },
    {
      "epoch": 0.05078660499466668,
      "grad_norm": 0.18738602101802826,
      "learning_rate": 8.525e-06,
      "loss": 0.8076,
      "step": 1659
    },
    {
      "epoch": 0.05081721777645973,
      "grad_norm": 0.2189522385597229,
      "learning_rate": 8.500000000000002e-06,
      "loss": 0.7522,
      "step": 1660
    },
    {
      "epoch": 0.050847830558252774,
      "grad_norm": 0.2620421051979065,
      "learning_rate": 8.475000000000001e-06,
      "loss": 0.6793,
      "step": 1661
    },
    {
      "epoch": 0.05087844334004582,
      "grad_norm": 0.32495126128196716,
      "learning_rate": 8.45e-06,
      "loss": 0.7903,
      "step": 1662
    },
    {
      "epoch": 0.05090905612183887,
      "grad_norm": 0.2128186672925949,
      "learning_rate": 8.425000000000001e-06,
      "loss": 0.8208,
      "step": 1663
    },
    {
      "epoch": 0.05093966890363192,
      "grad_norm": 0.19781242311000824,
      "learning_rate": 8.400000000000001e-06,
      "loss": 0.7979,
      "step": 1664
    },
    {
      "epoch": 0.050970281685424966,
      "grad_norm": 0.13403639197349548,
      "learning_rate": 8.375e-06,
      "loss": 0.6671,
      "step": 1665
    },
    {
      "epoch": 0.05100089446721801,
      "grad_norm": 0.18206468224525452,
      "learning_rate": 8.350000000000001e-06,
      "loss": 0.7505,
      "step": 1666
    },
    {
      "epoch": 0.051031507249011065,
      "grad_norm": 0.24645686149597168,
      "learning_rate": 8.325e-06,
      "loss": 0.7681,
      "step": 1667
    },
    {
      "epoch": 0.05106212003080411,
      "grad_norm": 0.37239405512809753,
      "learning_rate": 8.3e-06,
      "loss": 0.6633,
      "step": 1668
    },
    {
      "epoch": 0.05109273281259716,
      "grad_norm": 0.17945677042007446,
      "learning_rate": 8.275000000000001e-06,
      "loss": 0.7967,
      "step": 1669
    },
    {
      "epoch": 0.051123345594390204,
      "grad_norm": 0.23387126624584198,
      "learning_rate": 8.25e-06,
      "loss": 0.6386,
      "step": 1670
    },
    {
      "epoch": 0.05115395837618326,
      "grad_norm": 0.29536131024360657,
      "learning_rate": 8.225e-06,
      "loss": 0.6656,
      "step": 1671
    },
    {
      "epoch": 0.051184571157976304,
      "grad_norm": 0.6070420742034912,
      "learning_rate": 8.200000000000001e-06,
      "loss": 0.7388,
      "step": 1672
    },
    {
      "epoch": 0.05121518393976935,
      "grad_norm": 0.26393070816993713,
      "learning_rate": 8.175e-06,
      "loss": 0.8115,
      "step": 1673
    },
    {
      "epoch": 0.0512457967215624,
      "grad_norm": 0.2342115193605423,
      "learning_rate": 8.15e-06,
      "loss": 0.6063,
      "step": 1674
    },
    {
      "epoch": 0.05127640950335545,
      "grad_norm": 0.21809308230876923,
      "learning_rate": 8.125000000000001e-06,
      "loss": 0.7021,
      "step": 1675
    },
    {
      "epoch": 0.051307022285148496,
      "grad_norm": 0.19079121947288513,
      "learning_rate": 8.1e-06,
      "loss": 0.8438,
      "step": 1676
    },
    {
      "epoch": 0.05133763506694154,
      "grad_norm": 0.34489575028419495,
      "learning_rate": 8.075000000000001e-06,
      "loss": 0.733,
      "step": 1677
    },
    {
      "epoch": 0.051368247848734595,
      "grad_norm": 0.19377005100250244,
      "learning_rate": 8.050000000000001e-06,
      "loss": 0.7164,
      "step": 1678
    },
    {
      "epoch": 0.05139886063052764,
      "grad_norm": 0.21816489100456238,
      "learning_rate": 8.025e-06,
      "loss": 0.7873,
      "step": 1679
    },
    {
      "epoch": 0.05142947341232069,
      "grad_norm": 0.16981734335422516,
      "learning_rate": 8.000000000000001e-06,
      "loss": 0.7002,
      "step": 1680
    },
    {
      "epoch": 0.051460086194113734,
      "grad_norm": 0.1259424388408661,
      "learning_rate": 7.975e-06,
      "loss": 0.676,
      "step": 1681
    },
    {
      "epoch": 0.05149069897590679,
      "grad_norm": 0.21944841742515564,
      "learning_rate": 7.95e-06,
      "loss": 0.8016,
      "step": 1682
    },
    {
      "epoch": 0.051521311757699834,
      "grad_norm": 0.16056184470653534,
      "learning_rate": 7.925000000000001e-06,
      "loss": 0.705,
      "step": 1683
    },
    {
      "epoch": 0.05155192453949288,
      "grad_norm": 0.5676759481430054,
      "learning_rate": 7.9e-06,
      "loss": 0.7015,
      "step": 1684
    },
    {
      "epoch": 0.051582537321285926,
      "grad_norm": 0.17534229159355164,
      "learning_rate": 7.875e-06,
      "loss": 0.7044,
      "step": 1685
    },
    {
      "epoch": 0.05161315010307898,
      "grad_norm": 0.2128453105688095,
      "learning_rate": 7.850000000000001e-06,
      "loss": 0.7282,
      "step": 1686
    },
    {
      "epoch": 0.051643762884872026,
      "grad_norm": 0.1641882210969925,
      "learning_rate": 7.825e-06,
      "loss": 0.7676,
      "step": 1687
    },
    {
      "epoch": 0.05167437566666507,
      "grad_norm": 0.3296613395214081,
      "learning_rate": 7.8e-06,
      "loss": 0.8055,
      "step": 1688
    },
    {
      "epoch": 0.05170498844845812,
      "grad_norm": 0.22692061960697174,
      "learning_rate": 7.775000000000001e-06,
      "loss": 0.6009,
      "step": 1689
    },
    {
      "epoch": 0.05173560123025117,
      "grad_norm": 0.23220714926719666,
      "learning_rate": 7.75e-06,
      "loss": 0.6812,
      "step": 1690
    },
    {
      "epoch": 0.05176621401204422,
      "grad_norm": 0.22598305344581604,
      "learning_rate": 7.725e-06,
      "loss": 0.7887,
      "step": 1691
    },
    {
      "epoch": 0.051796826793837264,
      "grad_norm": 0.22877754271030426,
      "learning_rate": 7.7e-06,
      "loss": 0.6659,
      "step": 1692
    },
    {
      "epoch": 0.05182743957563031,
      "grad_norm": 0.16403557360172272,
      "learning_rate": 7.675e-06,
      "loss": 0.6716,
      "step": 1693
    },
    {
      "epoch": 0.05185805235742336,
      "grad_norm": 2.35122013092041,
      "learning_rate": 7.65e-06,
      "loss": 0.7006,
      "step": 1694
    },
    {
      "epoch": 0.05188866513921641,
      "grad_norm": 0.20687636733055115,
      "learning_rate": 7.625e-06,
      "loss": 0.7085,
      "step": 1695
    },
    {
      "epoch": 0.051919277921009456,
      "grad_norm": 0.2407185435295105,
      "learning_rate": 7.6e-06,
      "loss": 0.6266,
      "step": 1696
    },
    {
      "epoch": 0.0519498907028025,
      "grad_norm": 0.2524780035018921,
      "learning_rate": 7.575e-06,
      "loss": 0.7501,
      "step": 1697
    },
    {
      "epoch": 0.051980503484595555,
      "grad_norm": 0.17073781788349152,
      "learning_rate": 7.55e-06,
      "loss": 0.5893,
      "step": 1698
    },
    {
      "epoch": 0.0520111162663886,
      "grad_norm": 0.17331229150295258,
      "learning_rate": 7.525e-06,
      "loss": 0.7823,
      "step": 1699
    },
    {
      "epoch": 0.05204172904818165,
      "grad_norm": 0.21335388720035553,
      "learning_rate": 7.5e-06,
      "loss": 0.8481,
      "step": 1700
    },
    {
      "epoch": 0.052072341829974694,
      "grad_norm": 0.1802944839000702,
      "learning_rate": 7.4750000000000004e-06,
      "loss": 0.8386,
      "step": 1701
    },
    {
      "epoch": 0.05210295461176775,
      "grad_norm": 0.4171488285064697,
      "learning_rate": 7.45e-06,
      "loss": 0.625,
      "step": 1702
    },
    {
      "epoch": 0.052133567393560794,
      "grad_norm": 0.19102302193641663,
      "learning_rate": 7.425e-06,
      "loss": 0.7525,
      "step": 1703
    },
    {
      "epoch": 0.05216418017535384,
      "grad_norm": 0.17875568568706512,
      "learning_rate": 7.4e-06,
      "loss": 0.592,
      "step": 1704
    },
    {
      "epoch": 0.052194792957146886,
      "grad_norm": 0.21885529160499573,
      "learning_rate": 7.375e-06,
      "loss": 0.8422,
      "step": 1705
    },
    {
      "epoch": 0.05222540573893994,
      "grad_norm": 0.2097679078578949,
      "learning_rate": 7.35e-06,
      "loss": 0.8075,
      "step": 1706
    },
    {
      "epoch": 0.052256018520732986,
      "grad_norm": 0.4510006010532379,
      "learning_rate": 7.325e-06,
      "loss": 0.8563,
      "step": 1707
    },
    {
      "epoch": 0.05228663130252603,
      "grad_norm": 0.20790322124958038,
      "learning_rate": 7.2999999999999996e-06,
      "loss": 0.6765,
      "step": 1708
    },
    {
      "epoch": 0.05231724408431908,
      "grad_norm": 0.1703735888004303,
      "learning_rate": 7.275e-06,
      "loss": 0.5852,
      "step": 1709
    },
    {
      "epoch": 0.05234785686611213,
      "grad_norm": 0.14204388856887817,
      "learning_rate": 7.25e-06,
      "loss": 0.7149,
      "step": 1710
    },
    {
      "epoch": 0.05237846964790518,
      "grad_norm": 0.1893712729215622,
      "learning_rate": 7.2249999999999994e-06,
      "loss": 0.7563,
      "step": 1711
    },
    {
      "epoch": 0.052409082429698224,
      "grad_norm": 0.21546171605587006,
      "learning_rate": 7.2e-06,
      "loss": 0.7282,
      "step": 1712
    },
    {
      "epoch": 0.05243969521149127,
      "grad_norm": 0.33784613013267517,
      "learning_rate": 7.175e-06,
      "loss": 0.7939,
      "step": 1713
    },
    {
      "epoch": 0.052470307993284324,
      "grad_norm": 0.18855346739292145,
      "learning_rate": 7.15e-06,
      "loss": 0.7412,
      "step": 1714
    },
    {
      "epoch": 0.05250092077507737,
      "grad_norm": 0.15056855976581573,
      "learning_rate": 7.1249999999999995e-06,
      "loss": 0.8282,
      "step": 1715
    },
    {
      "epoch": 0.052531533556870416,
      "grad_norm": 0.19531558454036713,
      "learning_rate": 7.1e-06,
      "loss": 0.8728,
      "step": 1716
    },
    {
      "epoch": 0.05256214633866346,
      "grad_norm": 0.16257528960704803,
      "learning_rate": 7.075e-06,
      "loss": 0.5997,
      "step": 1717
    },
    {
      "epoch": 0.052592759120456516,
      "grad_norm": 0.23577460646629333,
      "learning_rate": 7.049999999999999e-06,
      "loss": 0.8128,
      "step": 1718
    },
    {
      "epoch": 0.05262337190224956,
      "grad_norm": 0.2041068971157074,
      "learning_rate": 7.025000000000001e-06,
      "loss": 0.7218,
      "step": 1719
    },
    {
      "epoch": 0.05265398468404261,
      "grad_norm": 0.16134090721607208,
      "learning_rate": 7.000000000000001e-06,
      "loss": 0.6937,
      "step": 1720
    },
    {
      "epoch": 0.052684597465835654,
      "grad_norm": 0.37593454122543335,
      "learning_rate": 6.975000000000001e-06,
      "loss": 0.6906,
      "step": 1721
    },
    {
      "epoch": 0.05271521024762871,
      "grad_norm": 0.1931181699037552,
      "learning_rate": 6.950000000000001e-06,
      "loss": 0.6676,
      "step": 1722
    },
    {
      "epoch": 0.052745823029421754,
      "grad_norm": 0.20231810212135315,
      "learning_rate": 6.925000000000001e-06,
      "loss": 0.8702,
      "step": 1723
    },
    {
      "epoch": 0.0527764358112148,
      "grad_norm": 0.17015162110328674,
      "learning_rate": 6.900000000000001e-06,
      "loss": 0.8129,
      "step": 1724
    },
    {
      "epoch": 0.052807048593007846,
      "grad_norm": 0.2000030130147934,
      "learning_rate": 6.875000000000001e-06,
      "loss": 0.6763,
      "step": 1725
    },
    {
      "epoch": 0.0528376613748009,
      "grad_norm": 0.1939508318901062,
      "learning_rate": 6.8500000000000005e-06,
      "loss": 0.8759,
      "step": 1726
    },
    {
      "epoch": 0.052868274156593946,
      "grad_norm": 0.16820907592773438,
      "learning_rate": 6.825000000000001e-06,
      "loss": 0.6232,
      "step": 1727
    },
    {
      "epoch": 0.05289888693838699,
      "grad_norm": 0.28471896052360535,
      "learning_rate": 6.800000000000001e-06,
      "loss": 0.7008,
      "step": 1728
    },
    {
      "epoch": 0.05292949972018004,
      "grad_norm": 0.17785529792308807,
      "learning_rate": 6.775000000000001e-06,
      "loss": 0.7169,
      "step": 1729
    },
    {
      "epoch": 0.05296011250197309,
      "grad_norm": 0.24167223274707794,
      "learning_rate": 6.750000000000001e-06,
      "loss": 0.7305,
      "step": 1730
    },
    {
      "epoch": 0.05299072528376614,
      "grad_norm": 0.19206897914409637,
      "learning_rate": 6.725000000000001e-06,
      "loss": 0.7458,
      "step": 1731
    },
    {
      "epoch": 0.053021338065559184,
      "grad_norm": 0.41176724433898926,
      "learning_rate": 6.700000000000001e-06,
      "loss": 0.7322,
      "step": 1732
    },
    {
      "epoch": 0.05305195084735223,
      "grad_norm": 0.1697763204574585,
      "learning_rate": 6.6750000000000005e-06,
      "loss": 0.7107,
      "step": 1733
    },
    {
      "epoch": 0.053082563629145284,
      "grad_norm": 0.14381657540798187,
      "learning_rate": 6.650000000000001e-06,
      "loss": 0.7723,
      "step": 1734
    },
    {
      "epoch": 0.05311317641093833,
      "grad_norm": 0.34905487298965454,
      "learning_rate": 6.625000000000001e-06,
      "loss": 0.6621,
      "step": 1735
    },
    {
      "epoch": 0.053143789192731376,
      "grad_norm": 0.2939896881580353,
      "learning_rate": 6.6e-06,
      "loss": 0.6311,
      "step": 1736
    },
    {
      "epoch": 0.05317440197452442,
      "grad_norm": 0.23655951023101807,
      "learning_rate": 6.5750000000000006e-06,
      "loss": 0.8286,
      "step": 1737
    },
    {
      "epoch": 0.053205014756317476,
      "grad_norm": 0.2007512003183365,
      "learning_rate": 6.550000000000001e-06,
      "loss": 0.8502,
      "step": 1738
    },
    {
      "epoch": 0.05323562753811052,
      "grad_norm": 0.18162930011749268,
      "learning_rate": 6.525e-06,
      "loss": 0.8821,
      "step": 1739
    },
    {
      "epoch": 0.05326624031990357,
      "grad_norm": 0.22441232204437256,
      "learning_rate": 6.5000000000000004e-06,
      "loss": 0.8163,
      "step": 1740
    },
    {
      "epoch": 0.053296853101696615,
      "grad_norm": 0.1651020348072052,
      "learning_rate": 6.475000000000001e-06,
      "loss": 0.6233,
      "step": 1741
    },
    {
      "epoch": 0.05332746588348967,
      "grad_norm": 0.2931329309940338,
      "learning_rate": 6.45e-06,
      "loss": 0.8147,
      "step": 1742
    },
    {
      "epoch": 0.053358078665282714,
      "grad_norm": 0.17724351584911346,
      "learning_rate": 6.425e-06,
      "loss": 0.7467,
      "step": 1743
    },
    {
      "epoch": 0.05338869144707576,
      "grad_norm": 0.2628931403160095,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 0.7077,
      "step": 1744
    },
    {
      "epoch": 0.05341930422886881,
      "grad_norm": 0.1988299936056137,
      "learning_rate": 6.375000000000001e-06,
      "loss": 0.8624,
      "step": 1745
    },
    {
      "epoch": 0.05344991701066186,
      "grad_norm": 0.3903224468231201,
      "learning_rate": 6.35e-06,
      "loss": 0.817,
      "step": 1746
    },
    {
      "epoch": 0.053480529792454906,
      "grad_norm": 0.2470809817314148,
      "learning_rate": 6.3250000000000004e-06,
      "loss": 0.6565,
      "step": 1747
    },
    {
      "epoch": 0.05351114257424795,
      "grad_norm": 0.26171353459358215,
      "learning_rate": 6.300000000000001e-06,
      "loss": 0.747,
      "step": 1748
    },
    {
      "epoch": 0.053541755356041,
      "grad_norm": 0.24998386204242706,
      "learning_rate": 6.275e-06,
      "loss": 0.7437,
      "step": 1749
    },
    {
      "epoch": 0.05357236813783405,
      "grad_norm": 0.16469882428646088,
      "learning_rate": 6.25e-06,
      "loss": 0.7004,
      "step": 1750
    },
    {
      "epoch": 0.0536029809196271,
      "grad_norm": 0.1687631458044052,
      "learning_rate": 6.2250000000000005e-06,
      "loss": 0.7491,
      "step": 1751
    },
    {
      "epoch": 0.053633593701420144,
      "grad_norm": 0.2632650136947632,
      "learning_rate": 6.2e-06,
      "loss": 0.7584,
      "step": 1752
    },
    {
      "epoch": 0.05366420648321319,
      "grad_norm": 0.3997511863708496,
      "learning_rate": 6.175e-06,
      "loss": 0.7422,
      "step": 1753
    },
    {
      "epoch": 0.053694819265006244,
      "grad_norm": 0.1457836627960205,
      "learning_rate": 6.15e-06,
      "loss": 0.6759,
      "step": 1754
    },
    {
      "epoch": 0.05372543204679929,
      "grad_norm": 0.20162567496299744,
      "learning_rate": 6.125e-06,
      "loss": 0.5826,
      "step": 1755
    },
    {
      "epoch": 0.053756044828592336,
      "grad_norm": 0.1821262538433075,
      "learning_rate": 6.1e-06,
      "loss": 0.686,
      "step": 1756
    },
    {
      "epoch": 0.05378665761038539,
      "grad_norm": 0.19438721239566803,
      "learning_rate": 6.075e-06,
      "loss": 0.7979,
      "step": 1757
    },
    {
      "epoch": 0.053817270392178436,
      "grad_norm": 0.16508685052394867,
      "learning_rate": 6.0500000000000005e-06,
      "loss": 0.6363,
      "step": 1758
    },
    {
      "epoch": 0.05384788317397148,
      "grad_norm": 0.14570090174674988,
      "learning_rate": 6.025e-06,
      "loss": 0.6943,
      "step": 1759
    },
    {
      "epoch": 0.05387849595576453,
      "grad_norm": 0.31953689455986023,
      "learning_rate": 6e-06,
      "loss": 0.7096,
      "step": 1760
    },
    {
      "epoch": 0.05390910873755758,
      "grad_norm": 0.2912423312664032,
      "learning_rate": 5.975e-06,
      "loss": 0.7557,
      "step": 1761
    },
    {
      "epoch": 0.05393972151935063,
      "grad_norm": 0.297395795583725,
      "learning_rate": 5.95e-06,
      "loss": 0.8632,
      "step": 1762
    },
    {
      "epoch": 0.053970334301143674,
      "grad_norm": 0.16705255210399628,
      "learning_rate": 5.925e-06,
      "loss": 0.717,
      "step": 1763
    },
    {
      "epoch": 0.05400094708293672,
      "grad_norm": 0.1990862339735031,
      "learning_rate": 5.9e-06,
      "loss": 0.7145,
      "step": 1764
    },
    {
      "epoch": 0.054031559864729774,
      "grad_norm": 0.17376422882080078,
      "learning_rate": 5.875e-06,
      "loss": 0.7793,
      "step": 1765
    },
    {
      "epoch": 0.05406217264652282,
      "grad_norm": 0.1474568247795105,
      "learning_rate": 5.850000000000001e-06,
      "loss": 0.7252,
      "step": 1766
    },
    {
      "epoch": 0.054092785428315866,
      "grad_norm": 0.2529583275318146,
      "learning_rate": 5.825000000000001e-06,
      "loss": 0.7506,
      "step": 1767
    },
    {
      "epoch": 0.05412339821010891,
      "grad_norm": 0.23975840210914612,
      "learning_rate": 5.8e-06,
      "loss": 0.7058,
      "step": 1768
    },
    {
      "epoch": 0.054154010991901966,
      "grad_norm": 0.18080109357833862,
      "learning_rate": 5.775000000000001e-06,
      "loss": 0.8061,
      "step": 1769
    },
    {
      "epoch": 0.05418462377369501,
      "grad_norm": 0.352898508310318,
      "learning_rate": 5.750000000000001e-06,
      "loss": 0.7531,
      "step": 1770
    },
    {
      "epoch": 0.05421523655548806,
      "grad_norm": 0.1498080939054489,
      "learning_rate": 5.725e-06,
      "loss": 0.6654,
      "step": 1771
    },
    {
      "epoch": 0.054245849337281105,
      "grad_norm": 0.1784876137971878,
      "learning_rate": 5.7000000000000005e-06,
      "loss": 0.7602,
      "step": 1772
    },
    {
      "epoch": 0.05427646211907416,
      "grad_norm": 0.1589439958333969,
      "learning_rate": 5.675000000000001e-06,
      "loss": 0.8144,
      "step": 1773
    },
    {
      "epoch": 0.054307074900867204,
      "grad_norm": 0.19767868518829346,
      "learning_rate": 5.65e-06,
      "loss": 0.7556,
      "step": 1774
    },
    {
      "epoch": 0.05433768768266025,
      "grad_norm": 0.2499147206544876,
      "learning_rate": 5.625e-06,
      "loss": 0.7407,
      "step": 1775
    },
    {
      "epoch": 0.0543683004644533,
      "grad_norm": 0.2228129357099533,
      "learning_rate": 5.600000000000001e-06,
      "loss": 0.7453,
      "step": 1776
    },
    {
      "epoch": 0.05439891324624635,
      "grad_norm": 0.36121177673339844,
      "learning_rate": 5.575e-06,
      "loss": 0.8074,
      "step": 1777
    },
    {
      "epoch": 0.054429526028039396,
      "grad_norm": 0.14970862865447998,
      "learning_rate": 5.55e-06,
      "loss": 0.7048,
      "step": 1778
    },
    {
      "epoch": 0.05446013880983244,
      "grad_norm": 0.28590700030326843,
      "learning_rate": 5.5250000000000005e-06,
      "loss": 0.8697,
      "step": 1779
    },
    {
      "epoch": 0.05449075159162549,
      "grad_norm": 0.12230058759450912,
      "learning_rate": 5.500000000000001e-06,
      "loss": 0.5194,
      "step": 1780
    },
    {
      "epoch": 0.05452136437341854,
      "grad_norm": 0.27105942368507385,
      "learning_rate": 5.475e-06,
      "loss": 0.7038,
      "step": 1781
    },
    {
      "epoch": 0.05455197715521159,
      "grad_norm": 0.27199503779411316,
      "learning_rate": 5.45e-06,
      "loss": 0.8372,
      "step": 1782
    },
    {
      "epoch": 0.054582589937004634,
      "grad_norm": 0.13204067945480347,
      "learning_rate": 5.4250000000000006e-06,
      "loss": 0.6656,
      "step": 1783
    },
    {
      "epoch": 0.05461320271879768,
      "grad_norm": 0.4038136601448059,
      "learning_rate": 5.4e-06,
      "loss": 0.6367,
      "step": 1784
    },
    {
      "epoch": 0.054643815500590734,
      "grad_norm": 0.14178359508514404,
      "learning_rate": 5.375e-06,
      "loss": 0.7852,
      "step": 1785
    },
    {
      "epoch": 0.05467442828238378,
      "grad_norm": 0.23617903888225555,
      "learning_rate": 5.3500000000000004e-06,
      "loss": 0.8096,
      "step": 1786
    },
    {
      "epoch": 0.054705041064176826,
      "grad_norm": 0.292989581823349,
      "learning_rate": 5.325e-06,
      "loss": 0.7708,
      "step": 1787
    },
    {
      "epoch": 0.05473565384596987,
      "grad_norm": 0.3514353334903717,
      "learning_rate": 5.3e-06,
      "loss": 0.8144,
      "step": 1788
    },
    {
      "epoch": 0.054766266627762926,
      "grad_norm": 0.24606868624687195,
      "learning_rate": 5.275e-06,
      "loss": 0.8282,
      "step": 1789
    },
    {
      "epoch": 0.05479687940955597,
      "grad_norm": 0.19965842366218567,
      "learning_rate": 5.25e-06,
      "loss": 0.7818,
      "step": 1790
    },
    {
      "epoch": 0.05482749219134902,
      "grad_norm": 0.20567180216312408,
      "learning_rate": 5.225e-06,
      "loss": 0.7794,
      "step": 1791
    },
    {
      "epoch": 0.054858104973142065,
      "grad_norm": 0.24173671007156372,
      "learning_rate": 5.2e-06,
      "loss": 0.6748,
      "step": 1792
    },
    {
      "epoch": 0.05488871775493512,
      "grad_norm": 0.18126225471496582,
      "learning_rate": 5.175e-06,
      "loss": 0.7244,
      "step": 1793
    },
    {
      "epoch": 0.054919330536728164,
      "grad_norm": 0.35086899995803833,
      "learning_rate": 5.15e-06,
      "loss": 0.7335,
      "step": 1794
    },
    {
      "epoch": 0.05494994331852121,
      "grad_norm": 0.26201528310775757,
      "learning_rate": 5.125e-06,
      "loss": 0.7325,
      "step": 1795
    },
    {
      "epoch": 0.05498055610031426,
      "grad_norm": 0.19221776723861694,
      "learning_rate": 5.1e-06,
      "loss": 0.6095,
      "step": 1796
    },
    {
      "epoch": 0.05501116888210731,
      "grad_norm": 2.6318180561065674,
      "learning_rate": 5.0750000000000005e-06,
      "loss": 0.7599,
      "step": 1797
    },
    {
      "epoch": 0.055041781663900356,
      "grad_norm": 0.1661502718925476,
      "learning_rate": 5.050000000000001e-06,
      "loss": 0.7045,
      "step": 1798
    },
    {
      "epoch": 0.0550723944456934,
      "grad_norm": 0.21585451066493988,
      "learning_rate": 5.025e-06,
      "loss": 0.8838,
      "step": 1799
    },
    {
      "epoch": 0.05510300722748645,
      "grad_norm": 1.78583562374115,
      "learning_rate": 5e-06,
      "loss": 0.6489,
      "step": 1800
    },
    {
      "epoch": 0.0551336200092795,
      "grad_norm": 0.3068593442440033,
      "learning_rate": 4.975000000000001e-06,
      "loss": 0.8527,
      "step": 1801
    },
    {
      "epoch": 0.05516423279107255,
      "grad_norm": 0.18784654140472412,
      "learning_rate": 4.950000000000001e-06,
      "loss": 0.7595,
      "step": 1802
    },
    {
      "epoch": 0.055194845572865595,
      "grad_norm": 0.3054613173007965,
      "learning_rate": 4.925e-06,
      "loss": 0.8068,
      "step": 1803
    },
    {
      "epoch": 0.05522545835465864,
      "grad_norm": 0.16008980572223663,
      "learning_rate": 4.9000000000000005e-06,
      "loss": 0.7203,
      "step": 1804
    },
    {
      "epoch": 0.055256071136451694,
      "grad_norm": 0.2382369339466095,
      "learning_rate": 4.875000000000001e-06,
      "loss": 0.8153,
      "step": 1805
    },
    {
      "epoch": 0.05528668391824474,
      "grad_norm": 0.21501265466213226,
      "learning_rate": 4.85e-06,
      "loss": 0.6401,
      "step": 1806
    },
    {
      "epoch": 0.05531729670003779,
      "grad_norm": 0.3021475076675415,
      "learning_rate": 4.825e-06,
      "loss": 0.621,
      "step": 1807
    },
    {
      "epoch": 0.05534790948183083,
      "grad_norm": 0.1342448741197586,
      "learning_rate": 4.800000000000001e-06,
      "loss": 0.6619,
      "step": 1808
    },
    {
      "epoch": 0.055378522263623886,
      "grad_norm": 0.17561277747154236,
      "learning_rate": 4.775e-06,
      "loss": 0.7191,
      "step": 1809
    },
    {
      "epoch": 0.05540913504541693,
      "grad_norm": 0.15056774020195007,
      "learning_rate": 4.75e-06,
      "loss": 0.6937,
      "step": 1810
    },
    {
      "epoch": 0.05543974782720998,
      "grad_norm": 0.1654406189918518,
      "learning_rate": 4.7250000000000005e-06,
      "loss": 0.7897,
      "step": 1811
    },
    {
      "epoch": 0.055470360609003025,
      "grad_norm": 0.14608435332775116,
      "learning_rate": 4.7e-06,
      "loss": 0.6175,
      "step": 1812
    },
    {
      "epoch": 0.05550097339079608,
      "grad_norm": 0.26280805468559265,
      "learning_rate": 4.675e-06,
      "loss": 0.8024,
      "step": 1813
    },
    {
      "epoch": 0.055531586172589124,
      "grad_norm": 0.17181113362312317,
      "learning_rate": 4.65e-06,
      "loss": 0.7371,
      "step": 1814
    },
    {
      "epoch": 0.05556219895438217,
      "grad_norm": 0.26489022374153137,
      "learning_rate": 4.625e-06,
      "loss": 0.8855,
      "step": 1815
    },
    {
      "epoch": 0.05559281173617522,
      "grad_norm": 0.1889534592628479,
      "learning_rate": 4.6e-06,
      "loss": 0.6309,
      "step": 1816
    },
    {
      "epoch": 0.05562342451796827,
      "grad_norm": 0.2193518877029419,
      "learning_rate": 4.575e-06,
      "loss": 0.5929,
      "step": 1817
    },
    {
      "epoch": 0.055654037299761316,
      "grad_norm": 0.3430089056491852,
      "learning_rate": 4.5500000000000005e-06,
      "loss": 0.7536,
      "step": 1818
    },
    {
      "epoch": 0.05568465008155436,
      "grad_norm": 0.149757519364357,
      "learning_rate": 4.525e-06,
      "loss": 0.656,
      "step": 1819
    },
    {
      "epoch": 0.05571526286334741,
      "grad_norm": 0.27429482340812683,
      "learning_rate": 4.5e-06,
      "loss": 0.7431,
      "step": 1820
    },
    {
      "epoch": 0.05574587564514046,
      "grad_norm": 0.17854250967502594,
      "learning_rate": 4.475e-06,
      "loss": 0.7217,
      "step": 1821
    },
    {
      "epoch": 0.05577648842693351,
      "grad_norm": 0.1632390320301056,
      "learning_rate": 4.45e-06,
      "loss": 0.6993,
      "step": 1822
    },
    {
      "epoch": 0.055807101208726555,
      "grad_norm": 0.211224764585495,
      "learning_rate": 4.425e-06,
      "loss": 0.8422,
      "step": 1823
    },
    {
      "epoch": 0.0558377139905196,
      "grad_norm": 0.31507962942123413,
      "learning_rate": 4.4e-06,
      "loss": 0.5683,
      "step": 1824
    },
    {
      "epoch": 0.055868326772312654,
      "grad_norm": 0.33798840641975403,
      "learning_rate": 4.375e-06,
      "loss": 0.7355,
      "step": 1825
    },
    {
      "epoch": 0.0558989395541057,
      "grad_norm": 0.16777919232845306,
      "learning_rate": 4.35e-06,
      "loss": 0.705,
      "step": 1826
    },
    {
      "epoch": 0.05592955233589875,
      "grad_norm": 0.20378972589969635,
      "learning_rate": 4.325e-06,
      "loss": 0.6812,
      "step": 1827
    },
    {
      "epoch": 0.05596016511769179,
      "grad_norm": 0.1542394757270813,
      "learning_rate": 4.2999999999999995e-06,
      "loss": 0.6676,
      "step": 1828
    },
    {
      "epoch": 0.055990777899484846,
      "grad_norm": 0.3252175748348236,
      "learning_rate": 4.2750000000000006e-06,
      "loss": 0.8682,
      "step": 1829
    },
    {
      "epoch": 0.05602139068127789,
      "grad_norm": 0.18716713786125183,
      "learning_rate": 4.250000000000001e-06,
      "loss": 0.6965,
      "step": 1830
    },
    {
      "epoch": 0.05605200346307094,
      "grad_norm": 0.21057115495204926,
      "learning_rate": 4.225e-06,
      "loss": 0.7508,
      "step": 1831
    },
    {
      "epoch": 0.056082616244863985,
      "grad_norm": 0.2618989944458008,
      "learning_rate": 4.2000000000000004e-06,
      "loss": 0.7724,
      "step": 1832
    },
    {
      "epoch": 0.05611322902665704,
      "grad_norm": 0.15817677974700928,
      "learning_rate": 4.175000000000001e-06,
      "loss": 0.5826,
      "step": 1833
    },
    {
      "epoch": 0.056143841808450085,
      "grad_norm": 0.14318108558654785,
      "learning_rate": 4.15e-06,
      "loss": 0.6787,
      "step": 1834
    },
    {
      "epoch": 0.05617445459024313,
      "grad_norm": 0.15108223259449005,
      "learning_rate": 4.125e-06,
      "loss": 0.6976,
      "step": 1835
    },
    {
      "epoch": 0.05620506737203618,
      "grad_norm": 0.18957237899303436,
      "learning_rate": 4.1000000000000006e-06,
      "loss": 0.7004,
      "step": 1836
    },
    {
      "epoch": 0.05623568015382923,
      "grad_norm": 0.24269287288188934,
      "learning_rate": 4.075e-06,
      "loss": 0.671,
      "step": 1837
    },
    {
      "epoch": 0.05626629293562228,
      "grad_norm": 0.20656317472457886,
      "learning_rate": 4.05e-06,
      "loss": 0.6725,
      "step": 1838
    },
    {
      "epoch": 0.05629690571741532,
      "grad_norm": 0.23046916723251343,
      "learning_rate": 4.0250000000000004e-06,
      "loss": 0.7247,
      "step": 1839
    },
    {
      "epoch": 0.056327518499208376,
      "grad_norm": 0.13774670660495758,
      "learning_rate": 4.000000000000001e-06,
      "loss": 0.5267,
      "step": 1840
    },
    {
      "epoch": 0.05635813128100142,
      "grad_norm": 0.16444531083106995,
      "learning_rate": 3.975e-06,
      "loss": 0.8193,
      "step": 1841
    },
    {
      "epoch": 0.05638874406279447,
      "grad_norm": 0.3549332320690155,
      "learning_rate": 3.95e-06,
      "loss": 0.6791,
      "step": 1842
    },
    {
      "epoch": 0.056419356844587515,
      "grad_norm": 0.24899324774742126,
      "learning_rate": 3.9250000000000005e-06,
      "loss": 0.7479,
      "step": 1843
    },
    {
      "epoch": 0.05644996962638057,
      "grad_norm": 0.1483343541622162,
      "learning_rate": 3.9e-06,
      "loss": 0.6967,
      "step": 1844
    },
    {
      "epoch": 0.056480582408173614,
      "grad_norm": 0.5083380341529846,
      "learning_rate": 3.875e-06,
      "loss": 0.6838,
      "step": 1845
    },
    {
      "epoch": 0.05651119518996666,
      "grad_norm": 2.453470468521118,
      "learning_rate": 3.85e-06,
      "loss": 0.6527,
      "step": 1846
    },
    {
      "epoch": 0.05654180797175971,
      "grad_norm": 0.1482744663953781,
      "learning_rate": 3.825e-06,
      "loss": 0.6956,
      "step": 1847
    },
    {
      "epoch": 0.05657242075355276,
      "grad_norm": 0.17126566171646118,
      "learning_rate": 3.8e-06,
      "loss": 0.7343,
      "step": 1848
    },
    {
      "epoch": 0.056603033535345806,
      "grad_norm": 0.23918463289737701,
      "learning_rate": 3.775e-06,
      "loss": 0.8351,
      "step": 1849
    },
    {
      "epoch": 0.05663364631713885,
      "grad_norm": 0.404384046792984,
      "learning_rate": 3.75e-06,
      "loss": 0.7803,
      "step": 1850
    },
    {
      "epoch": 0.0566642590989319,
      "grad_norm": 0.22967982292175293,
      "learning_rate": 3.725e-06,
      "loss": 0.7942,
      "step": 1851
    },
    {
      "epoch": 0.05669487188072495,
      "grad_norm": 0.12506280839443207,
      "learning_rate": 3.7e-06,
      "loss": 0.6805,
      "step": 1852
    },
    {
      "epoch": 0.056725484662518,
      "grad_norm": 0.17211754620075226,
      "learning_rate": 3.675e-06,
      "loss": 0.6938,
      "step": 1853
    },
    {
      "epoch": 0.056756097444311045,
      "grad_norm": 0.1875794380903244,
      "learning_rate": 3.6499999999999998e-06,
      "loss": 0.7026,
      "step": 1854
    },
    {
      "epoch": 0.05678671022610409,
      "grad_norm": 0.2610374093055725,
      "learning_rate": 3.625e-06,
      "loss": 0.7615,
      "step": 1855
    },
    {
      "epoch": 0.056817323007897144,
      "grad_norm": 0.3171074092388153,
      "learning_rate": 3.6e-06,
      "loss": 0.7332,
      "step": 1856
    },
    {
      "epoch": 0.05684793578969019,
      "grad_norm": 0.17313598096370697,
      "learning_rate": 3.575e-06,
      "loss": 0.8132,
      "step": 1857
    },
    {
      "epoch": 0.05687854857148324,
      "grad_norm": 0.17949628829956055,
      "learning_rate": 3.55e-06,
      "loss": 0.8159,
      "step": 1858
    },
    {
      "epoch": 0.05690916135327628,
      "grad_norm": 0.2075027972459793,
      "learning_rate": 3.5249999999999997e-06,
      "loss": 0.6272,
      "step": 1859
    },
    {
      "epoch": 0.056939774135069336,
      "grad_norm": 0.17496755719184875,
      "learning_rate": 3.5000000000000004e-06,
      "loss": 0.7917,
      "step": 1860
    },
    {
      "epoch": 0.05697038691686238,
      "grad_norm": 0.2061583250761032,
      "learning_rate": 3.4750000000000006e-06,
      "loss": 0.8488,
      "step": 1861
    },
    {
      "epoch": 0.05700099969865543,
      "grad_norm": 0.20431779325008392,
      "learning_rate": 3.4500000000000004e-06,
      "loss": 0.735,
      "step": 1862
    },
    {
      "epoch": 0.057031612480448475,
      "grad_norm": 0.18036247789859772,
      "learning_rate": 3.4250000000000002e-06,
      "loss": 0.7432,
      "step": 1863
    },
    {
      "epoch": 0.05706222526224153,
      "grad_norm": 0.20169523358345032,
      "learning_rate": 3.4000000000000005e-06,
      "loss": 0.7256,
      "step": 1864
    },
    {
      "epoch": 0.057092838044034575,
      "grad_norm": 0.1634170562028885,
      "learning_rate": 3.3750000000000003e-06,
      "loss": 0.6506,
      "step": 1865
    },
    {
      "epoch": 0.05712345082582762,
      "grad_norm": 0.26710858941078186,
      "learning_rate": 3.3500000000000005e-06,
      "loss": 0.8069,
      "step": 1866
    },
    {
      "epoch": 0.05715406360762067,
      "grad_norm": 0.1702592521905899,
      "learning_rate": 3.3250000000000004e-06,
      "loss": 0.6475,
      "step": 1867
    },
    {
      "epoch": 0.05718467638941372,
      "grad_norm": 0.31365975737571716,
      "learning_rate": 3.3e-06,
      "loss": 0.6229,
      "step": 1868
    },
    {
      "epoch": 0.05721528917120677,
      "grad_norm": 0.1414778232574463,
      "learning_rate": 3.2750000000000004e-06,
      "loss": 0.6636,
      "step": 1869
    },
    {
      "epoch": 0.05724590195299981,
      "grad_norm": 0.1525268852710724,
      "learning_rate": 3.2500000000000002e-06,
      "loss": 0.673,
      "step": 1870
    },
    {
      "epoch": 0.05727651473479286,
      "grad_norm": 0.18583688139915466,
      "learning_rate": 3.225e-06,
      "loss": 0.7393,
      "step": 1871
    },
    {
      "epoch": 0.05730712751658591,
      "grad_norm": 0.3937184810638428,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 0.7009,
      "step": 1872
    },
    {
      "epoch": 0.05733774029837896,
      "grad_norm": 0.1583988070487976,
      "learning_rate": 3.175e-06,
      "loss": 0.7204,
      "step": 1873
    },
    {
      "epoch": 0.057368353080172005,
      "grad_norm": 0.208236962556839,
      "learning_rate": 3.1500000000000003e-06,
      "loss": 0.7425,
      "step": 1874
    },
    {
      "epoch": 0.05739896586196505,
      "grad_norm": 0.21602371335029602,
      "learning_rate": 3.125e-06,
      "loss": 0.7484,
      "step": 1875
    },
    {
      "epoch": 0.057429578643758104,
      "grad_norm": 0.2181994467973709,
      "learning_rate": 3.1e-06,
      "loss": 0.8847,
      "step": 1876
    },
    {
      "epoch": 0.05746019142555115,
      "grad_norm": 0.3163932263851166,
      "learning_rate": 3.075e-06,
      "loss": 0.6569,
      "step": 1877
    },
    {
      "epoch": 0.0574908042073442,
      "grad_norm": 0.2205355316400528,
      "learning_rate": 3.05e-06,
      "loss": 0.7566,
      "step": 1878
    },
    {
      "epoch": 0.05752141698913724,
      "grad_norm": 0.39185836911201477,
      "learning_rate": 3.0250000000000003e-06,
      "loss": 0.7343,
      "step": 1879
    },
    {
      "epoch": 0.057552029770930296,
      "grad_norm": 0.24171265959739685,
      "learning_rate": 3e-06,
      "loss": 0.6918,
      "step": 1880
    },
    {
      "epoch": 0.05758264255272334,
      "grad_norm": 0.13574226200580597,
      "learning_rate": 2.975e-06,
      "loss": 0.5843,
      "step": 1881
    },
    {
      "epoch": 0.05761325533451639,
      "grad_norm": 0.1396929919719696,
      "learning_rate": 2.95e-06,
      "loss": 0.6401,
      "step": 1882
    },
    {
      "epoch": 0.057643868116309435,
      "grad_norm": 0.13706500828266144,
      "learning_rate": 2.9250000000000004e-06,
      "loss": 0.7267,
      "step": 1883
    },
    {
      "epoch": 0.05767448089810249,
      "grad_norm": 0.19841693341732025,
      "learning_rate": 2.9e-06,
      "loss": 0.6807,
      "step": 1884
    },
    {
      "epoch": 0.057705093679895535,
      "grad_norm": 0.2582291066646576,
      "learning_rate": 2.8750000000000004e-06,
      "loss": 0.6901,
      "step": 1885
    },
    {
      "epoch": 0.05773570646168858,
      "grad_norm": 0.23676660656929016,
      "learning_rate": 2.8500000000000002e-06,
      "loss": 0.783,
      "step": 1886
    },
    {
      "epoch": 0.05776631924348163,
      "grad_norm": 0.22414517402648926,
      "learning_rate": 2.825e-06,
      "loss": 0.7166,
      "step": 1887
    },
    {
      "epoch": 0.05779693202527468,
      "grad_norm": 0.1739206165075302,
      "learning_rate": 2.8000000000000003e-06,
      "loss": 0.6941,
      "step": 1888
    },
    {
      "epoch": 0.05782754480706773,
      "grad_norm": 0.1685245782136917,
      "learning_rate": 2.775e-06,
      "loss": 0.8185,
      "step": 1889
    },
    {
      "epoch": 0.05785815758886077,
      "grad_norm": 0.17247501015663147,
      "learning_rate": 2.7500000000000004e-06,
      "loss": 0.739,
      "step": 1890
    },
    {
      "epoch": 0.05788877037065382,
      "grad_norm": 0.17371977865695953,
      "learning_rate": 2.725e-06,
      "loss": 0.7901,
      "step": 1891
    },
    {
      "epoch": 0.05791938315244687,
      "grad_norm": 0.16300912201404572,
      "learning_rate": 2.7e-06,
      "loss": 0.8067,
      "step": 1892
    },
    {
      "epoch": 0.05794999593423992,
      "grad_norm": 0.2127021998167038,
      "learning_rate": 2.6750000000000002e-06,
      "loss": 0.6952,
      "step": 1893
    },
    {
      "epoch": 0.057980608716032965,
      "grad_norm": 0.18358264863491058,
      "learning_rate": 2.65e-06,
      "loss": 0.8296,
      "step": 1894
    },
    {
      "epoch": 0.05801122149782601,
      "grad_norm": 0.21264488995075226,
      "learning_rate": 2.625e-06,
      "loss": 0.7709,
      "step": 1895
    },
    {
      "epoch": 0.058041834279619064,
      "grad_norm": 0.3932223320007324,
      "learning_rate": 2.6e-06,
      "loss": 0.6769,
      "step": 1896
    },
    {
      "epoch": 0.05807244706141211,
      "grad_norm": 0.18969665467739105,
      "learning_rate": 2.575e-06,
      "loss": 0.8762,
      "step": 1897
    },
    {
      "epoch": 0.05810305984320516,
      "grad_norm": 0.1924058198928833,
      "learning_rate": 2.55e-06,
      "loss": 0.7448,
      "step": 1898
    },
    {
      "epoch": 0.0581336726249982,
      "grad_norm": 0.5334935784339905,
      "learning_rate": 2.5250000000000004e-06,
      "loss": 0.8007,
      "step": 1899
    },
    {
      "epoch": 0.058164285406791257,
      "grad_norm": 0.19069750607013702,
      "learning_rate": 2.5e-06,
      "loss": 0.7973,
      "step": 1900
    },
    {
      "epoch": 0.0581948981885843,
      "grad_norm": 0.17058272659778595,
      "learning_rate": 2.4750000000000004e-06,
      "loss": 0.6711,
      "step": 1901
    },
    {
      "epoch": 0.05822551097037735,
      "grad_norm": 0.2107059210538864,
      "learning_rate": 2.4500000000000003e-06,
      "loss": 0.7221,
      "step": 1902
    },
    {
      "epoch": 0.058256123752170395,
      "grad_norm": 0.651856005191803,
      "learning_rate": 2.425e-06,
      "loss": 0.7479,
      "step": 1903
    },
    {
      "epoch": 0.05828673653396345,
      "grad_norm": 0.1832963228225708,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 0.7735,
      "step": 1904
    },
    {
      "epoch": 0.058317349315756495,
      "grad_norm": 0.27906742691993713,
      "learning_rate": 2.375e-06,
      "loss": 0.6895,
      "step": 1905
    },
    {
      "epoch": 0.05834796209754954,
      "grad_norm": 0.16559183597564697,
      "learning_rate": 2.35e-06,
      "loss": 0.8907,
      "step": 1906
    },
    {
      "epoch": 0.05837857487934259,
      "grad_norm": 0.20367176830768585,
      "learning_rate": 2.325e-06,
      "loss": 0.7322,
      "step": 1907
    },
    {
      "epoch": 0.05840918766113564,
      "grad_norm": 0.21579672396183014,
      "learning_rate": 2.3e-06,
      "loss": 0.7496,
      "step": 1908
    },
    {
      "epoch": 0.05843980044292869,
      "grad_norm": 0.24877163767814636,
      "learning_rate": 2.2750000000000002e-06,
      "loss": 0.719,
      "step": 1909
    },
    {
      "epoch": 0.05847041322472173,
      "grad_norm": 0.3390607535839081,
      "learning_rate": 2.25e-06,
      "loss": 0.8355,
      "step": 1910
    },
    {
      "epoch": 0.05850102600651478,
      "grad_norm": 0.2000439465045929,
      "learning_rate": 2.225e-06,
      "loss": 0.9043,
      "step": 1911
    },
    {
      "epoch": 0.05853163878830783,
      "grad_norm": 0.2151809185743332,
      "learning_rate": 2.2e-06,
      "loss": 0.7063,
      "step": 1912
    },
    {
      "epoch": 0.05856225157010088,
      "grad_norm": 0.21695423126220703,
      "learning_rate": 2.175e-06,
      "loss": 0.7745,
      "step": 1913
    },
    {
      "epoch": 0.058592864351893925,
      "grad_norm": 0.28670534491539,
      "learning_rate": 2.1499999999999997e-06,
      "loss": 0.7995,
      "step": 1914
    },
    {
      "epoch": 0.05862347713368697,
      "grad_norm": 0.34711459279060364,
      "learning_rate": 2.1250000000000004e-06,
      "loss": 0.7639,
      "step": 1915
    },
    {
      "epoch": 0.058654089915480025,
      "grad_norm": 0.17490987479686737,
      "learning_rate": 2.1000000000000002e-06,
      "loss": 0.6583,
      "step": 1916
    },
    {
      "epoch": 0.05868470269727307,
      "grad_norm": 0.39340782165527344,
      "learning_rate": 2.075e-06,
      "loss": 0.7418,
      "step": 1917
    },
    {
      "epoch": 0.05871531547906612,
      "grad_norm": 0.14462348818778992,
      "learning_rate": 2.0500000000000003e-06,
      "loss": 0.7263,
      "step": 1918
    },
    {
      "epoch": 0.058745928260859163,
      "grad_norm": 0.7852398157119751,
      "learning_rate": 2.025e-06,
      "loss": 0.7124,
      "step": 1919
    },
    {
      "epoch": 0.05877654104265222,
      "grad_norm": 0.19998478889465332,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 0.7424,
      "step": 1920
    },
    {
      "epoch": 0.05880715382444526,
      "grad_norm": 0.17928685247898102,
      "learning_rate": 1.975e-06,
      "loss": 0.6651,
      "step": 1921
    },
    {
      "epoch": 0.05883776660623831,
      "grad_norm": 0.14412914216518402,
      "learning_rate": 1.95e-06,
      "loss": 0.801,
      "step": 1922
    },
    {
      "epoch": 0.058868379388031356,
      "grad_norm": 0.29626914858818054,
      "learning_rate": 1.925e-06,
      "loss": 0.7196,
      "step": 1923
    },
    {
      "epoch": 0.05889899216982441,
      "grad_norm": 0.22694402933120728,
      "learning_rate": 1.9e-06,
      "loss": 0.709,
      "step": 1924
    },
    {
      "epoch": 0.058929604951617455,
      "grad_norm": 0.3013089597225189,
      "learning_rate": 1.875e-06,
      "loss": 0.6695,
      "step": 1925
    },
    {
      "epoch": 0.0589602177334105,
      "grad_norm": 0.2107127457857132,
      "learning_rate": 1.85e-06,
      "loss": 0.7545,
      "step": 1926
    },
    {
      "epoch": 0.058990830515203554,
      "grad_norm": 0.1711435467004776,
      "learning_rate": 1.8249999999999999e-06,
      "loss": 0.7229,
      "step": 1927
    },
    {
      "epoch": 0.0590214432969966,
      "grad_norm": 0.2251552790403366,
      "learning_rate": 1.8e-06,
      "loss": 0.7295,
      "step": 1928
    },
    {
      "epoch": 0.05905205607878965,
      "grad_norm": 0.26477035880088806,
      "learning_rate": 1.775e-06,
      "loss": 0.7931,
      "step": 1929
    },
    {
      "epoch": 0.05908266886058269,
      "grad_norm": 0.2663504183292389,
      "learning_rate": 1.7500000000000002e-06,
      "loss": 0.7105,
      "step": 1930
    },
    {
      "epoch": 0.059113281642375747,
      "grad_norm": 1.7845817804336548,
      "learning_rate": 1.7250000000000002e-06,
      "loss": 0.7766,
      "step": 1931
    },
    {
      "epoch": 0.05914389442416879,
      "grad_norm": 0.17913375794887543,
      "learning_rate": 1.7000000000000002e-06,
      "loss": 0.7828,
      "step": 1932
    },
    {
      "epoch": 0.05917450720596184,
      "grad_norm": 0.8107318878173828,
      "learning_rate": 1.6750000000000003e-06,
      "loss": 0.8404,
      "step": 1933
    },
    {
      "epoch": 0.059205119987754885,
      "grad_norm": 0.18144871294498444,
      "learning_rate": 1.65e-06,
      "loss": 0.6604,
      "step": 1934
    },
    {
      "epoch": 0.05923573276954794,
      "grad_norm": 0.16706770658493042,
      "learning_rate": 1.6250000000000001e-06,
      "loss": 0.696,
      "step": 1935
    },
    {
      "epoch": 0.059266345551340985,
      "grad_norm": 0.1416487842798233,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 0.7045,
      "step": 1936
    },
    {
      "epoch": 0.05929695833313403,
      "grad_norm": 0.2333289533853531,
      "learning_rate": 1.5750000000000002e-06,
      "loss": 0.6541,
      "step": 1937
    },
    {
      "epoch": 0.05932757111492708,
      "grad_norm": 0.2506668269634247,
      "learning_rate": 1.55e-06,
      "loss": 0.6574,
      "step": 1938
    },
    {
      "epoch": 0.05935818389672013,
      "grad_norm": 0.46860405802726746,
      "learning_rate": 1.525e-06,
      "loss": 0.7022,
      "step": 1939
    },
    {
      "epoch": 0.05938879667851318,
      "grad_norm": 0.19005945324897766,
      "learning_rate": 1.5e-06,
      "loss": 0.8118,
      "step": 1940
    },
    {
      "epoch": 0.05941940946030622,
      "grad_norm": 0.34541475772857666,
      "learning_rate": 1.475e-06,
      "loss": 0.6861,
      "step": 1941
    },
    {
      "epoch": 0.05945002224209927,
      "grad_norm": 0.22724555432796478,
      "learning_rate": 1.45e-06,
      "loss": 0.7006,
      "step": 1942
    },
    {
      "epoch": 0.05948063502389232,
      "grad_norm": 0.23643815517425537,
      "learning_rate": 1.4250000000000001e-06,
      "loss": 0.7975,
      "step": 1943
    },
    {
      "epoch": 0.05951124780568537,
      "grad_norm": 0.17982187867164612,
      "learning_rate": 1.4000000000000001e-06,
      "loss": 0.6986,
      "step": 1944
    },
    {
      "epoch": 0.059541860587478415,
      "grad_norm": 0.3366946876049042,
      "learning_rate": 1.3750000000000002e-06,
      "loss": 0.7126,
      "step": 1945
    },
    {
      "epoch": 0.05957247336927146,
      "grad_norm": 0.2513495087623596,
      "learning_rate": 1.35e-06,
      "loss": 0.6216,
      "step": 1946
    },
    {
      "epoch": 0.059603086151064515,
      "grad_norm": 0.17015685141086578,
      "learning_rate": 1.325e-06,
      "loss": 0.7963,
      "step": 1947
    },
    {
      "epoch": 0.05963369893285756,
      "grad_norm": 0.17392386496067047,
      "learning_rate": 1.3e-06,
      "loss": 0.6454,
      "step": 1948
    },
    {
      "epoch": 0.05966431171465061,
      "grad_norm": 0.2926434576511383,
      "learning_rate": 1.275e-06,
      "loss": 0.6169,
      "step": 1949
    },
    {
      "epoch": 0.05969492449644365,
      "grad_norm": 0.15666463971138,
      "learning_rate": 1.25e-06,
      "loss": 0.6906,
      "step": 1950
    },
    {
      "epoch": 0.05972553727823671,
      "grad_norm": 0.2558903694152832,
      "learning_rate": 1.2250000000000001e-06,
      "loss": 0.7246,
      "step": 1951
    },
    {
      "epoch": 0.05975615006002975,
      "grad_norm": 0.2129763662815094,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 0.6894,
      "step": 1952
    },
    {
      "epoch": 0.0597867628418228,
      "grad_norm": 0.19184786081314087,
      "learning_rate": 1.175e-06,
      "loss": 0.719,
      "step": 1953
    },
    {
      "epoch": 0.059817375623615845,
      "grad_norm": 0.24374374747276306,
      "learning_rate": 1.15e-06,
      "loss": 0.7517,
      "step": 1954
    },
    {
      "epoch": 0.0598479884054089,
      "grad_norm": 0.3359440863132477,
      "learning_rate": 1.125e-06,
      "loss": 0.6897,
      "step": 1955
    },
    {
      "epoch": 0.059878601187201945,
      "grad_norm": 0.1996689885854721,
      "learning_rate": 1.1e-06,
      "loss": 0.6591,
      "step": 1956
    },
    {
      "epoch": 0.05990921396899499,
      "grad_norm": 0.1618756353855133,
      "learning_rate": 1.0749999999999999e-06,
      "loss": 0.7835,
      "step": 1957
    },
    {
      "epoch": 0.05993982675078804,
      "grad_norm": 0.5065921545028687,
      "learning_rate": 1.0500000000000001e-06,
      "loss": 0.7168,
      "step": 1958
    },
    {
      "epoch": 0.05997043953258109,
      "grad_norm": 0.18873938918113708,
      "learning_rate": 1.0250000000000001e-06,
      "loss": 0.7646,
      "step": 1959
    },
    {
      "epoch": 0.06000105231437414,
      "grad_norm": 0.39164412021636963,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 0.742,
      "step": 1960
    },
    {
      "epoch": 0.06003166509616718,
      "grad_norm": 0.19896982610225677,
      "learning_rate": 9.75e-07,
      "loss": 0.6628,
      "step": 1961
    },
    {
      "epoch": 0.06006227787796023,
      "grad_norm": 0.29845553636550903,
      "learning_rate": 9.5e-07,
      "loss": 0.8996,
      "step": 1962
    },
    {
      "epoch": 0.06009289065975328,
      "grad_norm": 0.27151718735694885,
      "learning_rate": 9.25e-07,
      "loss": 0.8107,
      "step": 1963
    },
    {
      "epoch": 0.06012350344154633,
      "grad_norm": 0.1456848829984665,
      "learning_rate": 9e-07,
      "loss": 0.6082,
      "step": 1964
    },
    {
      "epoch": 0.060154116223339375,
      "grad_norm": 0.14678721129894257,
      "learning_rate": 8.750000000000001e-07,
      "loss": 0.5955,
      "step": 1965
    },
    {
      "epoch": 0.06018472900513242,
      "grad_norm": 0.1972162425518036,
      "learning_rate": 8.500000000000001e-07,
      "loss": 0.6462,
      "step": 1966
    },
    {
      "epoch": 0.060215341786925475,
      "grad_norm": 0.18362252414226532,
      "learning_rate": 8.25e-07,
      "loss": 0.7145,
      "step": 1967
    },
    {
      "epoch": 0.06024595456871852,
      "grad_norm": 0.18649965524673462,
      "learning_rate": 8.000000000000001e-07,
      "loss": 0.723,
      "step": 1968
    },
    {
      "epoch": 0.06027656735051157,
      "grad_norm": 0.2920564115047455,
      "learning_rate": 7.75e-07,
      "loss": 0.6668,
      "step": 1969
    },
    {
      "epoch": 0.060307180132304614,
      "grad_norm": 0.21190553903579712,
      "learning_rate": 7.5e-07,
      "loss": 0.5679,
      "step": 1970
    },
    {
      "epoch": 0.06033779291409767,
      "grad_norm": 0.2235598862171173,
      "learning_rate": 7.25e-07,
      "loss": 0.8418,
      "step": 1971
    },
    {
      "epoch": 0.06036840569589071,
      "grad_norm": 0.38316014409065247,
      "learning_rate": 7.000000000000001e-07,
      "loss": 0.8183,
      "step": 1972
    },
    {
      "epoch": 0.06039901847768376,
      "grad_norm": 1.081520676612854,
      "learning_rate": 6.75e-07,
      "loss": 0.7925,
      "step": 1973
    },
    {
      "epoch": 0.060429631259476806,
      "grad_norm": 0.24224327504634857,
      "learning_rate": 6.5e-07,
      "loss": 0.7351,
      "step": 1974
    },
    {
      "epoch": 0.06046024404126986,
      "grad_norm": 0.1538304090499878,
      "learning_rate": 6.25e-07,
      "loss": 0.8146,
      "step": 1975
    },
    {
      "epoch": 0.060490856823062905,
      "grad_norm": 0.1940135955810547,
      "learning_rate": 6.000000000000001e-07,
      "loss": 0.777,
      "step": 1976
    },
    {
      "epoch": 0.06052146960485595,
      "grad_norm": 0.16389207541942596,
      "learning_rate": 5.75e-07,
      "loss": 0.6294,
      "step": 1977
    },
    {
      "epoch": 0.060552082386649,
      "grad_norm": 0.22328399121761322,
      "learning_rate": 5.5e-07,
      "loss": 0.8477,
      "step": 1978
    },
    {
      "epoch": 0.06058269516844205,
      "grad_norm": 0.1698482781648636,
      "learning_rate": 5.250000000000001e-07,
      "loss": 0.7227,
      "step": 1979
    },
    {
      "epoch": 0.0606133079502351,
      "grad_norm": 0.1920703798532486,
      "learning_rate": 5.000000000000001e-07,
      "loss": 0.6355,
      "step": 1980
    },
    {
      "epoch": 0.06064392073202814,
      "grad_norm": 0.6075984835624695,
      "learning_rate": 4.75e-07,
      "loss": 0.7784,
      "step": 1981
    },
    {
      "epoch": 0.06067453351382119,
      "grad_norm": 0.18547087907791138,
      "learning_rate": 4.5e-07,
      "loss": 0.6631,
      "step": 1982
    },
    {
      "epoch": 0.06070514629561424,
      "grad_norm": 0.14451444149017334,
      "learning_rate": 4.2500000000000006e-07,
      "loss": 0.6913,
      "step": 1983
    },
    {
      "epoch": 0.06073575907740729,
      "grad_norm": 0.1753396838903427,
      "learning_rate": 4.0000000000000003e-07,
      "loss": 0.7423,
      "step": 1984
    },
    {
      "epoch": 0.060766371859200335,
      "grad_norm": 0.22275547683238983,
      "learning_rate": 3.75e-07,
      "loss": 0.8772,
      "step": 1985
    },
    {
      "epoch": 0.06079698464099338,
      "grad_norm": 0.1293140947818756,
      "learning_rate": 3.5000000000000004e-07,
      "loss": 0.661,
      "step": 1986
    },
    {
      "epoch": 0.060827597422786435,
      "grad_norm": 0.7502373456954956,
      "learning_rate": 3.25e-07,
      "loss": 0.8567,
      "step": 1987
    },
    {
      "epoch": 0.06085821020457948,
      "grad_norm": 0.24594159424304962,
      "learning_rate": 3.0000000000000004e-07,
      "loss": 0.7844,
      "step": 1988
    },
    {
      "epoch": 0.06088882298637253,
      "grad_norm": 0.2009095400571823,
      "learning_rate": 2.75e-07,
      "loss": 0.8782,
      "step": 1989
    },
    {
      "epoch": 0.060919435768165574,
      "grad_norm": 0.14902116358280182,
      "learning_rate": 2.5000000000000004e-07,
      "loss": 0.6625,
      "step": 1990
    },
    {
      "epoch": 0.06095004854995863,
      "grad_norm": 0.18206657469272614,
      "learning_rate": 2.25e-07,
      "loss": 0.722,
      "step": 1991
    },
    {
      "epoch": 0.06098066133175167,
      "grad_norm": 0.14927811920642853,
      "learning_rate": 2.0000000000000002e-07,
      "loss": 0.5913,
      "step": 1992
    },
    {
      "epoch": 0.06101127411354472,
      "grad_norm": 0.23977361619472504,
      "learning_rate": 1.7500000000000002e-07,
      "loss": 0.5936,
      "step": 1993
    },
    {
      "epoch": 0.061041886895337766,
      "grad_norm": 0.28577926754951477,
      "learning_rate": 1.5000000000000002e-07,
      "loss": 0.7043,
      "step": 1994
    },
    {
      "epoch": 0.06107249967713082,
      "grad_norm": 0.2529245913028717,
      "learning_rate": 1.2500000000000002e-07,
      "loss": 0.7445,
      "step": 1995
    },
    {
      "epoch": 0.061103112458923865,
      "grad_norm": 0.3715488314628601,
      "learning_rate": 1.0000000000000001e-07,
      "loss": 0.7408,
      "step": 1996
    },
    {
      "epoch": 0.06113372524071691,
      "grad_norm": 0.14685317873954773,
      "learning_rate": 7.500000000000001e-08,
      "loss": 0.6698,
      "step": 1997
    },
    {
      "epoch": 0.06116433802250996,
      "grad_norm": 0.16792865097522736,
      "learning_rate": 5.0000000000000004e-08,
      "loss": 0.7934,
      "step": 1998
    },
    {
      "epoch": 0.06119495080430301,
      "grad_norm": 0.7086781859397888,
      "learning_rate": 2.5000000000000002e-08,
      "loss": 0.7242,
      "step": 1999
    },
    {
      "epoch": 0.06122556358609606,
      "grad_norm": 0.18056847155094147,
      "learning_rate": 0.0,
      "loss": 0.7235,
      "step": 2000
    },
    {
      "epoch": 0.06122556358609606,
      "step": 2000,
      "total_flos": 3.3909236563968e+16,
      "train_loss": 0.9592373611629009,
      "train_runtime": 5840.6966,
      "train_samples_per_second": 10.958,
      "train_steps_per_second": 0.342
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 2000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.3909236563968e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}