{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.1365,
  "eval_steps": 500,
  "global_step": 13650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1e-05,
      "grad_norm": 1.4574347149106512,
      "learning_rate": 3e-06,
      "loss": 10.8576,
      "step": 1
    },
    {
      "epoch": 2e-05,
      "grad_norm": 1.4482443114713344,
      "learning_rate": 6e-06,
      "loss": 10.8575,
      "step": 2
    },
    {
      "epoch": 3e-05,
      "grad_norm": 1.4642003089092823,
      "learning_rate": 9e-06,
      "loss": 10.8576,
      "step": 3
    },
    {
      "epoch": 4e-05,
      "grad_norm": 1.4485584072048276,
      "learning_rate": 1.2e-05,
      "loss": 10.8569,
      "step": 4
    },
    {
      "epoch": 5e-05,
      "grad_norm": 1.4574321657516995,
      "learning_rate": 1.5e-05,
      "loss": 10.8527,
      "step": 5
    },
    {
      "epoch": 6e-05,
      "grad_norm": 1.4585537094370684,
      "learning_rate": 1.8e-05,
      "loss": 10.8518,
      "step": 6
    },
    {
      "epoch": 7e-05,
      "grad_norm": 1.4183780170798466,
      "learning_rate": 2.1000000000000002e-05,
      "loss": 10.8383,
      "step": 7
    },
    {
      "epoch": 8e-05,
      "grad_norm": 1.286272643239374,
      "learning_rate": 2.4e-05,
      "loss": 10.8119,
      "step": 8
    },
    {
      "epoch": 9e-05,
      "grad_norm": 1.246364249616181,
      "learning_rate": 2.7e-05,
      "loss": 10.8063,
      "step": 9
    },
    {
      "epoch": 0.0001,
      "grad_norm": 1.2300728857390288,
      "learning_rate": 3e-05,
      "loss": 10.7913,
      "step": 10
    },
    {
      "epoch": 0.00011,
      "grad_norm": 1.1639358472437353,
      "learning_rate": 3.2999999999999996e-05,
      "loss": 10.7756,
      "step": 11
    },
    {
      "epoch": 0.00012,
      "grad_norm": 1.1455069336623074,
      "learning_rate": 3.6e-05,
      "loss": 10.7622,
      "step": 12
    },
    {
      "epoch": 0.00013,
      "grad_norm": 1.104711768149659,
      "learning_rate": 3.9e-05,
      "loss": 10.7415,
      "step": 13
    },
    {
      "epoch": 0.00014,
      "grad_norm": 1.0916409179063882,
      "learning_rate": 4.2000000000000004e-05,
      "loss": 10.7296,
      "step": 14
    },
    {
      "epoch": 0.00015,
      "grad_norm": 1.0723152562842644,
      "learning_rate": 4.4999999999999996e-05,
      "loss": 10.7183,
      "step": 15
    },
    {
      "epoch": 0.00016,
      "grad_norm": 1.0297043497697835,
      "learning_rate": 4.8e-05,
      "loss": 10.7,
      "step": 16
    },
    {
      "epoch": 0.00017,
      "grad_norm": 0.9882210863590011,
      "learning_rate": 5.1000000000000006e-05,
      "loss": 10.6838,
      "step": 17
    },
    {
      "epoch": 0.00018,
      "grad_norm": 0.966791538351231,
      "learning_rate": 5.4e-05,
      "loss": 10.6659,
      "step": 18
    },
    {
      "epoch": 0.00019,
      "grad_norm": 0.9358769725124266,
      "learning_rate": 5.7e-05,
      "loss": 10.6509,
      "step": 19
    },
    {
      "epoch": 0.0002,
      "grad_norm": 0.9313245306892226,
      "learning_rate": 6e-05,
      "loss": 10.638,
      "step": 20
    },
    {
      "epoch": 0.00021,
      "grad_norm": 0.9056933112957348,
      "learning_rate": 6.3e-05,
      "loss": 10.6235,
      "step": 21
    },
    {
      "epoch": 0.00022,
      "grad_norm": 0.9013277325687112,
      "learning_rate": 6.599999999999999e-05,
      "loss": 10.6069,
      "step": 22
    },
    {
      "epoch": 0.00023,
      "grad_norm": 0.8982860560109955,
      "learning_rate": 6.9e-05,
      "loss": 10.5935,
      "step": 23
    },
    {
      "epoch": 0.00024,
      "grad_norm": 0.8941575476865218,
      "learning_rate": 7.2e-05,
      "loss": 10.5802,
      "step": 24
    },
    {
      "epoch": 0.00025,
      "grad_norm": 0.8955811690787305,
      "learning_rate": 7.500000000000001e-05,
      "loss": 10.5652,
      "step": 25
    },
    {
      "epoch": 0.00026,
      "grad_norm": 0.9005417636666915,
      "learning_rate": 7.8e-05,
      "loss": 10.5507,
      "step": 26
    },
    {
      "epoch": 0.00027,
      "grad_norm": 0.8929394290279239,
      "learning_rate": 8.1e-05,
      "loss": 10.5379,
      "step": 27
    },
    {
      "epoch": 0.00028,
      "grad_norm": 0.892893092343029,
      "learning_rate": 8.400000000000001e-05,
      "loss": 10.5234,
      "step": 28
    },
    {
      "epoch": 0.00029,
      "grad_norm": 0.8951984198637418,
      "learning_rate": 8.7e-05,
      "loss": 10.5078,
      "step": 29
    },
    {
      "epoch": 0.0003,
      "grad_norm": 0.901037080772758,
      "learning_rate": 8.999999999999999e-05,
      "loss": 10.4913,
      "step": 30
    },
    {
      "epoch": 0.00031,
      "grad_norm": 0.899757879512845,
      "learning_rate": 9.3e-05,
      "loss": 10.4759,
      "step": 31
    },
    {
      "epoch": 0.00032,
      "grad_norm": 0.8963415000423225,
      "learning_rate": 9.6e-05,
      "loss": 10.4612,
      "step": 32
    },
    {
      "epoch": 0.00033,
      "grad_norm": 0.8971468155029705,
      "learning_rate": 9.900000000000001e-05,
      "loss": 10.4429,
      "step": 33
    },
    {
      "epoch": 0.00034,
      "grad_norm": 0.8964368558533553,
      "learning_rate": 0.00010200000000000001,
      "loss": 10.426,
      "step": 34
    },
    {
      "epoch": 0.00035,
      "grad_norm": 0.9007340868108898,
      "learning_rate": 0.00010500000000000002,
      "loss": 10.4083,
      "step": 35
    },
    {
      "epoch": 0.00036,
      "grad_norm": 0.8977475222905443,
      "learning_rate": 0.000108,
      "loss": 10.3895,
      "step": 36
    },
    {
      "epoch": 0.00037,
      "grad_norm": 0.8929896912849768,
      "learning_rate": 0.000111,
      "loss": 10.3721,
      "step": 37
    },
    {
      "epoch": 0.00038,
      "grad_norm": 0.8925204062685723,
      "learning_rate": 0.000114,
      "loss": 10.3515,
      "step": 38
    },
    {
      "epoch": 0.00039,
      "grad_norm": 0.8947925451707294,
      "learning_rate": 0.000117,
      "loss": 10.3314,
      "step": 39
    },
    {
      "epoch": 0.0004,
      "grad_norm": 0.8990961452390619,
      "learning_rate": 0.00012,
      "loss": 10.3088,
      "step": 40
    },
    {
      "epoch": 0.00041,
      "grad_norm": 0.8951984584897338,
      "learning_rate": 0.000123,
      "loss": 10.2891,
      "step": 41
    },
    {
      "epoch": 0.00042,
      "grad_norm": 0.8942493459254965,
      "learning_rate": 0.000126,
      "loss": 10.2679,
      "step": 42
    },
    {
      "epoch": 0.00043,
      "grad_norm": 0.8983684110980745,
      "learning_rate": 0.000129,
      "loss": 10.243,
      "step": 43
    },
    {
      "epoch": 0.00044,
      "grad_norm": 0.8985869855625094,
      "learning_rate": 0.00013199999999999998,
      "loss": 10.2206,
      "step": 44
    },
    {
      "epoch": 0.00045,
      "grad_norm": 0.899962207774676,
      "learning_rate": 0.000135,
      "loss": 10.1962,
      "step": 45
    },
    {
      "epoch": 0.00046,
      "grad_norm": 0.8914153211826606,
      "learning_rate": 0.000138,
      "loss": 10.1735,
      "step": 46
    },
    {
      "epoch": 0.00047,
      "grad_norm": 0.8980761529388176,
      "learning_rate": 0.000141,
      "loss": 10.146,
      "step": 47
    },
    {
      "epoch": 0.00048,
      "grad_norm": 0.9007540011988344,
      "learning_rate": 0.000144,
      "loss": 10.1222,
      "step": 48
    },
    {
      "epoch": 0.00049,
      "grad_norm": 0.896344611975273,
      "learning_rate": 0.000147,
      "loss": 10.0974,
      "step": 49
    },
    {
      "epoch": 0.0005,
      "grad_norm": 0.9015948414141589,
      "learning_rate": 0.00015000000000000001,
      "loss": 10.0706,
      "step": 50
    },
    {
      "epoch": 0.00051,
      "grad_norm": 0.9016102183936469,
      "learning_rate": 0.000153,
      "loss": 10.0433,
      "step": 51
    },
    {
      "epoch": 0.00052,
      "grad_norm": 0.8993585803645682,
      "learning_rate": 0.000156,
      "loss": 10.0158,
      "step": 52
    },
    {
      "epoch": 0.00053,
      "grad_norm": 0.8901736410301992,
      "learning_rate": 0.000159,
      "loss": 9.9917,
      "step": 53
    },
    {
      "epoch": 0.00054,
      "grad_norm": 0.9083328832659304,
      "learning_rate": 0.000162,
      "loss": 9.9578,
      "step": 54
    },
    {
      "epoch": 0.00055,
      "grad_norm": 0.8975570632788401,
      "learning_rate": 0.000165,
      "loss": 9.9337,
      "step": 55
    },
    {
      "epoch": 0.00056,
      "grad_norm": 0.8924582131156604,
      "learning_rate": 0.00016800000000000002,
      "loss": 9.904,
      "step": 56
    },
    {
      "epoch": 0.00057,
      "grad_norm": 0.8937117244788118,
      "learning_rate": 0.000171,
      "loss": 9.8765,
      "step": 57
    },
    {
      "epoch": 0.00058,
      "grad_norm": 0.8912667557674427,
      "learning_rate": 0.000174,
      "loss": 9.8514,
      "step": 58
    },
    {
      "epoch": 0.00059,
      "grad_norm": 0.899445317105929,
      "learning_rate": 0.000177,
      "loss": 9.819,
      "step": 59
    },
    {
      "epoch": 0.0006,
      "grad_norm": 0.8827247644946434,
      "learning_rate": 0.00017999999999999998,
      "loss": 9.7927,
      "step": 60
    },
    {
      "epoch": 0.00061,
      "grad_norm": 0.8856006114384688,
      "learning_rate": 0.000183,
      "loss": 9.7645,
      "step": 61
    },
    {
      "epoch": 0.00062,
      "grad_norm": 0.8887861476206323,
      "learning_rate": 0.000186,
      "loss": 9.7329,
      "step": 62
    },
    {
      "epoch": 0.00063,
      "grad_norm": 0.8870948257177573,
      "learning_rate": 0.000189,
      "loss": 9.7044,
      "step": 63
    },
    {
      "epoch": 0.00064,
      "grad_norm": 0.8854465518366905,
      "learning_rate": 0.000192,
      "loss": 9.6741,
      "step": 64
    },
    {
      "epoch": 0.00065,
      "grad_norm": 0.8913964558604793,
      "learning_rate": 0.00019500000000000002,
      "loss": 9.6417,
      "step": 65
    },
    {
      "epoch": 0.00066,
      "grad_norm": 0.8860459843940278,
      "learning_rate": 0.00019800000000000002,
      "loss": 9.6181,
      "step": 66
    },
    {
      "epoch": 0.00067,
      "grad_norm": 0.8855602986146246,
      "learning_rate": 0.000201,
      "loss": 9.5886,
      "step": 67
    },
    {
      "epoch": 0.00068,
      "grad_norm": 0.89182613307124,
      "learning_rate": 0.00020400000000000003,
      "loss": 9.5554,
      "step": 68
    },
    {
      "epoch": 0.00069,
      "grad_norm": 0.8788147221752699,
      "learning_rate": 0.00020700000000000002,
      "loss": 9.5276,
      "step": 69
    },
    {
      "epoch": 0.0007,
      "grad_norm": 0.89013480384513,
      "learning_rate": 0.00021000000000000004,
      "loss": 9.4911,
      "step": 70
    },
    {
      "epoch": 0.00071,
      "grad_norm": 0.8832608390392153,
      "learning_rate": 0.00021299999999999997,
      "loss": 9.4637,
      "step": 71
    },
    {
      "epoch": 0.00072,
      "grad_norm": 0.8850551460752941,
      "learning_rate": 0.000216,
      "loss": 9.4371,
      "step": 72
    },
    {
      "epoch": 0.00073,
      "grad_norm": 0.8854430272945265,
      "learning_rate": 0.00021899999999999998,
      "loss": 9.4029,
      "step": 73
    },
    {
      "epoch": 0.00074,
      "grad_norm": 0.881388508551569,
      "learning_rate": 0.000222,
      "loss": 9.3766,
      "step": 74
    },
    {
      "epoch": 0.00075,
      "grad_norm": 0.8804734413831777,
      "learning_rate": 0.000225,
      "loss": 9.3413,
      "step": 75
    },
    {
      "epoch": 0.00076,
      "grad_norm": 0.891470306594604,
      "learning_rate": 0.000228,
      "loss": 9.3136,
      "step": 76
    },
    {
      "epoch": 0.00077,
      "grad_norm": 0.8835806398314079,
      "learning_rate": 0.000231,
      "loss": 9.2843,
      "step": 77
    },
    {
      "epoch": 0.00078,
      "grad_norm": 0.8778470434340718,
      "learning_rate": 0.000234,
      "loss": 9.2534,
      "step": 78
    },
    {
      "epoch": 0.00079,
      "grad_norm": 0.8849142747864956,
      "learning_rate": 0.00023700000000000001,
      "loss": 9.2174,
      "step": 79
    },
    {
      "epoch": 0.0008,
      "grad_norm": 0.8831859916332898,
      "learning_rate": 0.00024,
      "loss": 9.1835,
      "step": 80
    },
    {
      "epoch": 0.00081,
      "grad_norm": 0.8883671155155716,
      "learning_rate": 0.00024300000000000002,
      "loss": 9.1516,
      "step": 81
    },
    {
      "epoch": 0.00082,
      "grad_norm": 0.8919005998406658,
      "learning_rate": 0.000246,
      "loss": 9.1278,
      "step": 82
    },
    {
      "epoch": 0.00083,
      "grad_norm": 0.8904109612127737,
      "learning_rate": 0.00024900000000000004,
      "loss": 9.0926,
      "step": 83
    },
    {
      "epoch": 0.00084,
      "grad_norm": 0.8922919840670517,
      "learning_rate": 0.000252,
      "loss": 9.0627,
      "step": 84
    },
    {
      "epoch": 0.00085,
      "grad_norm": 0.8851770394662277,
      "learning_rate": 0.000255,
      "loss": 9.0389,
      "step": 85
    },
    {
      "epoch": 0.00086,
      "grad_norm": 0.892287423580011,
      "learning_rate": 0.000258,
      "loss": 9.0029,
      "step": 86
    },
    {
      "epoch": 0.00087,
      "grad_norm": 0.8819038715566697,
      "learning_rate": 0.000261,
      "loss": 8.9819,
      "step": 87
    },
    {
      "epoch": 0.00088,
      "grad_norm": 0.8825670273484024,
      "learning_rate": 0.00026399999999999997,
      "loss": 8.9452,
      "step": 88
    },
    {
      "epoch": 0.00089,
      "grad_norm": 0.87734677010751,
      "learning_rate": 0.000267,
      "loss": 8.92,
      "step": 89
    },
    {
      "epoch": 0.0009,
      "grad_norm": 0.8800982265590005,
      "learning_rate": 0.00027,
      "loss": 8.8935,
      "step": 90
    },
    {
      "epoch": 0.00091,
      "grad_norm": 0.8751738333241339,
      "learning_rate": 0.000273,
      "loss": 8.8635,
      "step": 91
    },
    {
      "epoch": 0.00092,
      "grad_norm": 0.8805112062017327,
      "learning_rate": 0.000276,
      "loss": 8.8313,
      "step": 92
    },
    {
      "epoch": 0.00093,
      "grad_norm": 0.8782110873043902,
      "learning_rate": 0.000279,
      "loss": 8.8018,
      "step": 93
    },
    {
      "epoch": 0.00094,
      "grad_norm": 0.8793675998346636,
      "learning_rate": 0.000282,
      "loss": 8.7769,
      "step": 94
    },
    {
      "epoch": 0.00095,
      "grad_norm": 0.8739437022686155,
      "learning_rate": 0.000285,
      "loss": 8.7468,
      "step": 95
    },
    {
      "epoch": 0.00096,
      "grad_norm": 0.8792093745975903,
      "learning_rate": 0.000288,
      "loss": 8.7137,
      "step": 96
    },
    {
      "epoch": 0.00097,
      "grad_norm": 0.8733827683228169,
      "learning_rate": 0.000291,
      "loss": 8.6904,
      "step": 97
    },
    {
      "epoch": 0.00098,
      "grad_norm": 0.8720585520116785,
      "learning_rate": 0.000294,
      "loss": 8.6531,
      "step": 98
    },
    {
      "epoch": 0.00099,
      "grad_norm": 0.8706078195741852,
      "learning_rate": 0.000297,
      "loss": 8.6333,
      "step": 99
    },
    {
      "epoch": 0.001,
      "grad_norm": 0.868963072522692,
      "learning_rate": 0.00030000000000000003,
      "loss": 8.6078,
      "step": 100
    },
    {
      "epoch": 0.00101,
      "grad_norm": 0.8733428125714966,
      "learning_rate": 0.00030300000000000005,
      "loss": 8.5742,
      "step": 101
    },
    {
      "epoch": 0.00102,
      "grad_norm": 0.8594222895452354,
      "learning_rate": 0.000306,
      "loss": 8.5555,
      "step": 102
    },
    {
      "epoch": 0.00103,
      "grad_norm": 0.8663526773106932,
      "learning_rate": 0.000309,
      "loss": 8.5314,
      "step": 103
    },
    {
      "epoch": 0.00104,
      "grad_norm": 0.8588890352637412,
      "learning_rate": 0.000312,
      "loss": 8.5113,
      "step": 104
    },
    {
      "epoch": 0.00105,
      "grad_norm": 0.862875507176608,
      "learning_rate": 0.000315,
      "loss": 8.4848,
      "step": 105
    },
    {
      "epoch": 0.00106,
      "grad_norm": 0.858707935395144,
      "learning_rate": 0.000318,
      "loss": 8.4562,
      "step": 106
    },
    {
      "epoch": 0.00107,
      "grad_norm": 0.8477892944676333,
      "learning_rate": 0.000321,
      "loss": 8.4313,
      "step": 107
    },
    {
      "epoch": 0.00108,
      "grad_norm": 0.8523289020593828,
      "learning_rate": 0.000324,
      "loss": 8.4028,
      "step": 108
    },
    {
      "epoch": 0.00109,
      "grad_norm": 0.847730490341604,
      "learning_rate": 0.000327,
      "loss": 8.3837,
      "step": 109
    },
    {
      "epoch": 0.0011,
      "grad_norm": 0.8528691839316361,
      "learning_rate": 0.00033,
      "loss": 8.3605,
      "step": 110
    },
    {
      "epoch": 0.00111,
      "grad_norm": 0.8490568938398722,
      "learning_rate": 0.000333,
      "loss": 8.3302,
      "step": 111
    },
    {
      "epoch": 0.00112,
      "grad_norm": 0.8687159568709311,
      "learning_rate": 0.00033600000000000004,
      "loss": 8.3067,
      "step": 112
    },
    {
      "epoch": 0.00113,
      "grad_norm": 0.9514329048402193,
      "learning_rate": 0.000339,
      "loss": 8.2682,
      "step": 113
    },
    {
      "epoch": 0.00114,
      "grad_norm": 1.0232734531890686,
      "learning_rate": 0.000342,
      "loss": 8.2639,
      "step": 114
    },
    {
      "epoch": 0.00115,
      "grad_norm": 0.88782361437861,
      "learning_rate": 0.00034500000000000004,
      "loss": 8.2339,
      "step": 115
    },
    {
      "epoch": 0.00116,
      "grad_norm": 0.8477580889036649,
      "learning_rate": 0.000348,
      "loss": 8.2115,
      "step": 116
    },
    {
      "epoch": 0.00117,
      "grad_norm": 0.9025353672223668,
      "learning_rate": 0.000351,
      "loss": 8.182,
      "step": 117
    },
    {
      "epoch": 0.00118,
      "grad_norm": 0.8194804043714383,
      "learning_rate": 0.000354,
      "loss": 8.1632,
      "step": 118
    },
    {
      "epoch": 0.00119,
      "grad_norm": 0.8747187640572928,
      "learning_rate": 0.000357,
      "loss": 8.1526,
      "step": 119
    },
    {
      "epoch": 0.0012,
      "grad_norm": 0.819302051650556,
      "learning_rate": 0.00035999999999999997,
      "loss": 8.125,
      "step": 120
    },
    {
      "epoch": 0.00121,
      "grad_norm": 0.8357670289489353,
      "learning_rate": 0.000363,
      "loss": 8.0977,
      "step": 121
    },
    {
      "epoch": 0.00122,
      "grad_norm": 0.8269622916799205,
      "learning_rate": 0.000366,
      "loss": 8.0806,
      "step": 122
    },
    {
      "epoch": 0.00123,
      "grad_norm": 0.7923909888868983,
      "learning_rate": 0.000369,
      "loss": 8.0558,
      "step": 123
    },
    {
      "epoch": 0.00124,
      "grad_norm": 0.8405159164509748,
      "learning_rate": 0.000372,
      "loss": 8.0354,
      "step": 124
    },
    {
      "epoch": 0.00125,
      "grad_norm": 0.7957014508382675,
      "learning_rate": 0.000375,
      "loss": 8.0077,
      "step": 125
    },
    {
      "epoch": 0.00126,
      "grad_norm": 0.8032762193906265,
      "learning_rate": 0.000378,
      "loss": 7.9954,
      "step": 126
    },
    {
      "epoch": 0.00127,
      "grad_norm": 0.8534337354315669,
      "learning_rate": 0.000381,
      "loss": 7.9712,
      "step": 127
    },
    {
      "epoch": 0.00128,
      "grad_norm": 0.7889902179927859,
      "learning_rate": 0.000384,
      "loss": 7.9489,
      "step": 128
    },
    {
      "epoch": 0.00129,
      "grad_norm": 0.7667804338310586,
      "learning_rate": 0.00038700000000000003,
      "loss": 7.936,
      "step": 129
    },
    {
      "epoch": 0.0013,
      "grad_norm": 0.9090077886672961,
      "learning_rate": 0.00039000000000000005,
      "loss": 7.9113,
      "step": 130
    },
    {
      "epoch": 0.00131,
      "grad_norm": 1.1017726636538738,
      "learning_rate": 0.000393,
      "loss": 7.8813,
      "step": 131
    },
    {
      "epoch": 0.00132,
      "grad_norm": 0.9363739073599702,
      "learning_rate": 0.00039600000000000003,
      "loss": 7.8715,
      "step": 132
    },
    {
      "epoch": 0.00133,
      "grad_norm": 0.7265974712286192,
      "learning_rate": 0.00039900000000000005,
      "loss": 7.8436,
      "step": 133
    },
    {
      "epoch": 0.00134,
      "grad_norm": 1.085264808454749,
      "learning_rate": 0.000402,
      "loss": 7.8358,
      "step": 134
    },
    {
      "epoch": 0.00135,
      "grad_norm": 1.1024504993278263,
      "learning_rate": 0.00040500000000000003,
      "loss": 7.8094,
      "step": 135
    },
    {
      "epoch": 0.00136,
      "grad_norm": 0.7406778288782102,
      "learning_rate": 0.00040800000000000005,
      "loss": 7.7862,
      "step": 136
    },
    {
      "epoch": 0.00137,
      "grad_norm": 0.948337541042023,
      "learning_rate": 0.000411,
      "loss": 7.7688,
      "step": 137
    },
    {
      "epoch": 0.00138,
      "grad_norm": 0.9084991981726261,
      "learning_rate": 0.00041400000000000003,
      "loss": 7.7417,
      "step": 138
    },
    {
      "epoch": 0.00139,
      "grad_norm": 0.7074494223870315,
      "learning_rate": 0.00041700000000000005,
      "loss": 7.7227,
      "step": 139
    },
    {
      "epoch": 0.0014,
      "grad_norm": 0.7974190203181878,
      "learning_rate": 0.00042000000000000007,
      "loss": 7.7086,
      "step": 140
    },
    {
      "epoch": 0.00141,
      "grad_norm": 0.6861992297830735,
      "learning_rate": 0.000423,
      "loss": 7.6913,
      "step": 141
    },
    {
      "epoch": 0.00142,
      "grad_norm": 0.7349980301116962,
      "learning_rate": 0.00042599999999999995,
      "loss": 7.677,
      "step": 142
    },
    {
      "epoch": 0.00143,
      "grad_norm": 0.7684994731116016,
      "learning_rate": 0.00042899999999999997,
      "loss": 7.6484,
      "step": 143
    },
    {
      "epoch": 0.00144,
      "grad_norm": 0.6719380878952201,
      "learning_rate": 0.000432,
      "loss": 7.629,
      "step": 144
    },
    {
      "epoch": 0.00145,
      "grad_norm": 0.7001362890446781,
      "learning_rate": 0.000435,
      "loss": 7.6232,
      "step": 145
    },
    {
      "epoch": 0.00146,
      "grad_norm": 0.7384188504614574,
      "learning_rate": 0.00043799999999999997,
      "loss": 7.5849,
      "step": 146
    },
    {
      "epoch": 0.00147,
      "grad_norm": 0.6536906507985312,
      "learning_rate": 0.000441,
      "loss": 7.5755,
      "step": 147
    },
    {
      "epoch": 0.00148,
      "grad_norm": 0.6570387947682329,
      "learning_rate": 0.000444,
      "loss": 7.5632,
      "step": 148
    },
    {
      "epoch": 0.00149,
      "grad_norm": 0.7127206041028632,
      "learning_rate": 0.00044699999999999997,
      "loss": 7.5571,
      "step": 149
    },
    {
      "epoch": 0.0015,
      "grad_norm": 0.7308289900379658,
      "learning_rate": 0.00045,
      "loss": 7.5253,
      "step": 150
    },
    {
      "epoch": 0.00151,
      "grad_norm": 0.8664894863114535,
      "learning_rate": 0.000453,
      "loss": 7.505,
      "step": 151
    },
    {
      "epoch": 0.00152,
      "grad_norm": 0.8895841422117233,
      "learning_rate": 0.000456,
      "loss": 7.5034,
      "step": 152
    },
    {
      "epoch": 0.00153,
      "grad_norm": 0.662621907628313,
      "learning_rate": 0.000459,
      "loss": 7.4855,
      "step": 153
    },
    {
      "epoch": 0.00154,
      "grad_norm": 0.7176449004695503,
      "learning_rate": 0.000462,
      "loss": 7.4655,
      "step": 154
    },
    {
      "epoch": 0.00155,
      "grad_norm": 0.7407325792933065,
      "learning_rate": 0.000465,
      "loss": 7.4508,
      "step": 155
    },
    {
      "epoch": 0.00156,
      "grad_norm": 0.7037717820949844,
      "learning_rate": 0.000468,
      "loss": 7.4265,
      "step": 156
    },
    {
      "epoch": 0.00157,
      "grad_norm": 0.5688508632010355,
      "learning_rate": 0.000471,
      "loss": 7.4159,
      "step": 157
    },
    {
      "epoch": 0.00158,
      "grad_norm": 0.812094320370736,
      "learning_rate": 0.00047400000000000003,
      "loss": 7.4015,
      "step": 158
    },
    {
      "epoch": 0.00159,
      "grad_norm": 0.6851465337071334,
      "learning_rate": 0.000477,
      "loss": 7.3879,
      "step": 159
    },
    {
      "epoch": 0.0016,
      "grad_norm": 0.9553203809874583,
      "learning_rate": 0.00048,
      "loss": 7.3739,
      "step": 160
    },
    {
      "epoch": 0.00161,
      "grad_norm": 0.7328141993841643,
      "learning_rate": 0.00048300000000000003,
      "loss": 7.3497,
      "step": 161
    },
    {
      "epoch": 0.00162,
      "grad_norm": 0.6781537584586433,
      "learning_rate": 0.00048600000000000005,
      "loss": 7.3535,
      "step": 162
    },
    {
      "epoch": 0.00163,
      "grad_norm": 0.5807738383940755,
      "learning_rate": 0.0004890000000000001,
      "loss": 7.3306,
      "step": 163
    },
    {
      "epoch": 0.00164,
      "grad_norm": 0.6466438270814104,
      "learning_rate": 0.000492,
      "loss": 7.3371,
      "step": 164
    },
    {
      "epoch": 0.00165,
      "grad_norm": 0.4365334647721832,
      "learning_rate": 0.000495,
      "loss": 7.3038,
      "step": 165
    },
    {
      "epoch": 0.00166,
      "grad_norm": 0.7387692677149859,
      "learning_rate": 0.0004980000000000001,
      "loss": 7.2849,
      "step": 166
    },
    {
      "epoch": 0.00167,
      "grad_norm": 0.6489238814400348,
      "learning_rate": 0.000501,
      "loss": 7.2765,
      "step": 167
    },
    {
      "epoch": 0.00168,
      "grad_norm": 0.4943076416069544,
      "learning_rate": 0.000504,
      "loss": 7.2512,
      "step": 168
    },
    {
      "epoch": 0.00169,
      "grad_norm": 0.6623047646201438,
      "learning_rate": 0.0005070000000000001,
      "loss": 7.2485,
      "step": 169
    },
    {
      "epoch": 0.0017,
      "grad_norm": 0.5841690806336316,
      "learning_rate": 0.00051,
      "loss": 7.2331,
      "step": 170
    },
    {
      "epoch": 0.00171,
      "grad_norm": 0.455313434659774,
      "learning_rate": 0.000513,
      "loss": 7.2132,
      "step": 171
    },
    {
      "epoch": 0.00172,
      "grad_norm": 0.6012704724036779,
      "learning_rate": 0.000516,
      "loss": 7.214,
      "step": 172
    },
    {
      "epoch": 0.00173,
      "grad_norm": 0.5433283029678282,
      "learning_rate": 0.0005189999999999999,
      "loss": 7.1993,
      "step": 173
    },
    {
      "epoch": 0.00174,
      "grad_norm": 0.38251135313099477,
      "learning_rate": 0.000522,
      "loss": 7.2121,
      "step": 174
    },
    {
      "epoch": 0.00175,
      "grad_norm": 0.6483519243845758,
      "learning_rate": 0.000525,
      "loss": 7.1796,
      "step": 175
    },
    {
      "epoch": 0.00176,
      "grad_norm": 0.5106681215905619,
      "learning_rate": 0.0005279999999999999,
      "loss": 7.1531,
      "step": 176
    },
    {
      "epoch": 0.00177,
      "grad_norm": 0.3867365673342131,
      "learning_rate": 0.000531,
      "loss": 7.1566,
      "step": 177
    },
    {
      "epoch": 0.00178,
      "grad_norm": 0.6803005272577738,
      "learning_rate": 0.000534,
      "loss": 7.1525,
      "step": 178
    },
    {
      "epoch": 0.00179,
      "grad_norm": 0.44918531151657043,
      "learning_rate": 0.000537,
      "loss": 7.1407,
      "step": 179
    },
    {
      "epoch": 0.0018,
      "grad_norm": 0.39944612038572996,
      "learning_rate": 0.00054,
      "loss": 7.1121,
      "step": 180
    },
    {
      "epoch": 0.00181,
      "grad_norm": 0.5937704312371144,
      "learning_rate": 0.000543,
      "loss": 7.1037,
      "step": 181
    },
    {
      "epoch": 0.00182,
      "grad_norm": 0.3956001398507633,
      "learning_rate": 0.000546,
      "loss": 7.0976,
      "step": 182
    },
    {
      "epoch": 0.00183,
      "grad_norm": 0.36202282333499824,
      "learning_rate": 0.000549,
      "loss": 7.0864,
      "step": 183
    },
    {
      "epoch": 0.00184,
      "grad_norm": 0.564760920006622,
      "learning_rate": 0.000552,
      "loss": 7.0785,
      "step": 184
    },
    {
      "epoch": 0.00185,
      "grad_norm": 0.36333201524042624,
      "learning_rate": 0.000555,
      "loss": 7.0515,
      "step": 185
    },
    {
      "epoch": 0.00186,
      "grad_norm": 0.3517854453126823,
      "learning_rate": 0.000558,
      "loss": 7.0614,
      "step": 186
    },
    {
      "epoch": 0.00187,
      "grad_norm": 0.5413049623390372,
      "learning_rate": 0.000561,
      "loss": 7.0394,
      "step": 187
    },
    {
      "epoch": 0.00188,
      "grad_norm": 0.4452202297805183,
      "learning_rate": 0.000564,
      "loss": 7.0168,
      "step": 188
    },
    {
      "epoch": 0.00189,
      "grad_norm": 0.2888404630990515,
      "learning_rate": 0.000567,
      "loss": 7.0108,
      "step": 189
    },
    {
      "epoch": 0.0019,
      "grad_norm": 0.5069984229642092,
      "learning_rate": 0.00057,
      "loss": 6.9967,
      "step": 190
    },
    {
      "epoch": 0.00191,
      "grad_norm": 0.5049159939798347,
      "learning_rate": 0.000573,
      "loss": 6.9897,
      "step": 191
    },
    {
      "epoch": 0.00192,
      "grad_norm": 0.353459237780404,
      "learning_rate": 0.000576,
      "loss": 6.9784,
      "step": 192
    },
    {
      "epoch": 0.00193,
      "grad_norm": 0.4583730098712965,
      "learning_rate": 0.000579,
      "loss": 6.9798,
      "step": 193
    },
    {
      "epoch": 0.00194,
      "grad_norm": 0.5934016188318005,
      "learning_rate": 0.000582,
      "loss": 6.9757,
      "step": 194
    },
    {
      "epoch": 0.00195,
      "grad_norm": 0.6611510755360627,
      "learning_rate": 0.000585,
      "loss": 6.9556,
      "step": 195
    },
    {
      "epoch": 0.00196,
      "grad_norm": 0.6794801564658052,
      "learning_rate": 0.000588,
      "loss": 6.9455,
      "step": 196
    },
    {
      "epoch": 0.00197,
      "grad_norm": 0.6965025324131076,
      "learning_rate": 0.000591,
      "loss": 6.9424,
      "step": 197
    },
    {
      "epoch": 0.00198,
      "grad_norm": 1.0010424094015926,
      "learning_rate": 0.000594,
      "loss": 6.9428,
      "step": 198
    },
    {
      "epoch": 0.00199,
      "grad_norm": 0.8473959375492113,
      "learning_rate": 0.0005970000000000001,
      "loss": 6.9257,
      "step": 199
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.4674164352666973,
      "learning_rate": 0.0006000000000000001,
      "loss": 6.9135,
      "step": 200
    },
    {
      "epoch": 0.00201,
      "grad_norm": 0.6130101305251902,
      "learning_rate": 0.000603,
      "loss": 6.8907,
      "step": 201
    },
    {
      "epoch": 0.00202,
      "grad_norm": 0.5766204915712545,
      "learning_rate": 0.0006060000000000001,
      "loss": 6.8786,
      "step": 202
    },
    {
      "epoch": 0.00203,
      "grad_norm": 0.42726729176690026,
      "learning_rate": 0.0006090000000000001,
      "loss": 6.8819,
      "step": 203
    },
    {
      "epoch": 0.00204,
      "grad_norm": 0.5161441207128264,
      "learning_rate": 0.000612,
      "loss": 6.8675,
      "step": 204
    },
    {
      "epoch": 0.00205,
      "grad_norm": 0.44005998133884483,
      "learning_rate": 0.000615,
      "loss": 6.852,
      "step": 205
    },
    {
      "epoch": 0.00206,
      "grad_norm": 0.4214710856814876,
      "learning_rate": 0.000618,
      "loss": 6.8455,
      "step": 206
    },
    {
      "epoch": 0.00207,
      "grad_norm": 0.3835330802523119,
      "learning_rate": 0.000621,
      "loss": 6.8358,
      "step": 207
    },
    {
      "epoch": 0.00208,
      "grad_norm": 0.4118282878888594,
      "learning_rate": 0.000624,
      "loss": 6.8325,
      "step": 208
    },
    {
      "epoch": 0.00209,
      "grad_norm": 0.35207765117623135,
      "learning_rate": 0.000627,
      "loss": 6.8108,
      "step": 209
    },
    {
      "epoch": 0.0021,
      "grad_norm": 0.35165038308098223,
      "learning_rate": 0.00063,
      "loss": 6.8001,
      "step": 210
    },
    {
      "epoch": 0.00211,
      "grad_norm": 0.2907998960376576,
      "learning_rate": 0.000633,
      "loss": 6.794,
      "step": 211
    },
    {
      "epoch": 0.00212,
      "grad_norm": 0.34183459291083,
      "learning_rate": 0.000636,
      "loss": 6.7864,
      "step": 212
    },
    {
      "epoch": 0.00213,
      "grad_norm": 0.31625023606106545,
      "learning_rate": 0.000639,
      "loss": 6.7664,
      "step": 213
    },
    {
      "epoch": 0.00214,
      "grad_norm": 0.3389167462098675,
      "learning_rate": 0.000642,
      "loss": 6.7645,
      "step": 214
    },
    {
      "epoch": 0.00215,
      "grad_norm": 0.4689783367804683,
      "learning_rate": 0.000645,
      "loss": 6.7617,
      "step": 215
    },
    {
      "epoch": 0.00216,
      "grad_norm": 0.7554849005001304,
      "learning_rate": 0.000648,
      "loss": 6.763,
      "step": 216
    },
    {
      "epoch": 0.00217,
      "grad_norm": 1.3852957930548755,
      "learning_rate": 0.000651,
      "loss": 6.7541,
      "step": 217
    },
    {
      "epoch": 0.00218,
      "grad_norm": 0.6607509792225358,
      "learning_rate": 0.000654,
      "loss": 6.7405,
      "step": 218
    },
    {
      "epoch": 0.00219,
      "grad_norm": 0.6480761266032634,
      "learning_rate": 0.000657,
      "loss": 6.7122,
      "step": 219
    },
    {
      "epoch": 0.0022,
      "grad_norm": 1.2227203355321279,
      "learning_rate": 0.00066,
      "loss": 6.7471,
      "step": 220
    },
    {
      "epoch": 0.00221,
      "grad_norm": 1.1390371544106608,
      "learning_rate": 0.0006630000000000001,
      "loss": 6.7248,
      "step": 221
    },
    {
      "epoch": 0.00222,
      "grad_norm": 0.7354052143767025,
      "learning_rate": 0.000666,
      "loss": 6.7013,
      "step": 222
    },
    {
      "epoch": 0.00223,
      "grad_norm": 0.6134943537271429,
      "learning_rate": 0.000669,
      "loss": 6.6937,
      "step": 223
    },
    {
      "epoch": 0.00224,
      "grad_norm": 0.5502679770385809,
      "learning_rate": 0.0006720000000000001,
      "loss": 6.6934,
      "step": 224
    },
    {
      "epoch": 0.00225,
      "grad_norm": 0.47738132510350817,
      "learning_rate": 0.000675,
      "loss": 6.6778,
      "step": 225
    },
    {
      "epoch": 0.00226,
      "grad_norm": 0.507114065421453,
      "learning_rate": 0.000678,
      "loss": 6.6712,
      "step": 226
    },
    {
      "epoch": 0.00227,
      "grad_norm": 0.4403172481189424,
      "learning_rate": 0.0006810000000000001,
      "loss": 6.646,
      "step": 227
    },
    {
      "epoch": 0.00228,
      "grad_norm": 0.34083753361811386,
      "learning_rate": 0.000684,
      "loss": 6.6465,
      "step": 228
    },
    {
      "epoch": 0.00229,
      "grad_norm": 0.3502366193988861,
      "learning_rate": 0.000687,
      "loss": 6.6418,
      "step": 229
    },
    {
      "epoch": 0.0023,
      "grad_norm": 0.40867670062411404,
      "learning_rate": 0.0006900000000000001,
      "loss": 6.6218,
      "step": 230
    },
    {
      "epoch": 0.00231,
      "grad_norm": 0.3512396474179769,
      "learning_rate": 0.000693,
      "loss": 6.605,
      "step": 231
    },
    {
      "epoch": 0.00232,
      "grad_norm": 0.37061772246720376,
      "learning_rate": 0.000696,
      "loss": 6.6164,
      "step": 232
    },
    {
      "epoch": 0.00233,
      "grad_norm": 0.30975845835339183,
      "learning_rate": 0.0006990000000000001,
      "loss": 6.6026,
      "step": 233
    },
    {
      "epoch": 0.00234,
      "grad_norm": 0.31056463949410484,
      "learning_rate": 0.000702,
      "loss": 6.5934,
      "step": 234
    },
    {
      "epoch": 0.00235,
      "grad_norm": 0.3416829938754332,
      "learning_rate": 0.000705,
      "loss": 6.5798,
      "step": 235
    },
    {
      "epoch": 0.00236,
      "grad_norm": 0.3580449551775802,
      "learning_rate": 0.000708,
      "loss": 6.5618,
      "step": 236
    },
    {
      "epoch": 0.00237,
      "grad_norm": 0.31186166958910727,
      "learning_rate": 0.0007109999999999999,
      "loss": 6.5709,
      "step": 237
    },
    {
      "epoch": 0.00238,
      "grad_norm": 0.35712039334009765,
      "learning_rate": 0.000714,
      "loss": 6.5601,
      "step": 238
    },
    {
      "epoch": 0.00239,
      "grad_norm": 0.4689874670209286,
      "learning_rate": 0.000717,
      "loss": 6.5431,
      "step": 239
    },
    {
      "epoch": 0.0024,
      "grad_norm": 0.8385556457838154,
      "learning_rate": 0.0007199999999999999,
      "loss": 6.5652,
      "step": 240
    },
    {
      "epoch": 0.00241,
      "grad_norm": 1.2597764356570753,
      "learning_rate": 0.000723,
      "loss": 6.5721,
      "step": 241
    },
    {
      "epoch": 0.00242,
      "grad_norm": 0.7018547016449656,
      "learning_rate": 0.000726,
      "loss": 6.5258,
      "step": 242
    },
    {
      "epoch": 0.00243,
      "grad_norm": 0.9679840331331718,
      "learning_rate": 0.000729,
      "loss": 6.5235,
      "step": 243
    },
    {
      "epoch": 0.00244,
      "grad_norm": 0.8151244219176279,
      "learning_rate": 0.000732,
      "loss": 6.5289,
      "step": 244
    },
    {
      "epoch": 0.00245,
      "grad_norm": 0.7408239035858821,
      "learning_rate": 0.000735,
      "loss": 6.5159,
      "step": 245
    },
    {
      "epoch": 0.00246,
      "grad_norm": 0.7269888029650857,
      "learning_rate": 0.000738,
      "loss": 6.5006,
      "step": 246
    },
    {
      "epoch": 0.00247,
      "grad_norm": 0.6715573633422388,
      "learning_rate": 0.000741,
      "loss": 6.4916,
      "step": 247
    },
    {
      "epoch": 0.00248,
      "grad_norm": 0.5355440416613977,
      "learning_rate": 0.000744,
      "loss": 6.4965,
      "step": 248
    },
    {
      "epoch": 0.00249,
      "grad_norm": 0.47837466185087324,
      "learning_rate": 0.000747,
      "loss": 6.474,
      "step": 249
    },
    {
      "epoch": 0.0025,
      "grad_norm": 0.5421674013984196,
      "learning_rate": 0.00075,
      "loss": 6.4781,
      "step": 250
    },
    {
      "epoch": 0.00251,
      "grad_norm": 0.5046285559759315,
      "learning_rate": 0.000753,
      "loss": 6.4394,
      "step": 251
    },
    {
      "epoch": 0.00252,
      "grad_norm": 0.4902399046559828,
      "learning_rate": 0.000756,
      "loss": 6.4478,
      "step": 252
    },
    {
      "epoch": 0.00253,
      "grad_norm": 0.6026763597870729,
      "learning_rate": 0.000759,
      "loss": 6.4417,
      "step": 253
    },
    {
      "epoch": 0.00254,
      "grad_norm": 0.6534629043130656,
      "learning_rate": 0.000762,
      "loss": 6.4357,
      "step": 254
    },
    {
      "epoch": 0.00255,
      "grad_norm": 0.5879568865224861,
      "learning_rate": 0.0007650000000000001,
      "loss": 6.4331,
      "step": 255
    },
    {
      "epoch": 0.00256,
      "grad_norm": 0.587549512889128,
      "learning_rate": 0.000768,
      "loss": 6.4121,
      "step": 256
    },
    {
      "epoch": 0.00257,
      "grad_norm": 0.4948417519702818,
      "learning_rate": 0.000771,
      "loss": 6.4144,
      "step": 257
    },
    {
      "epoch": 0.00258,
      "grad_norm": 0.500640645286767,
      "learning_rate": 0.0007740000000000001,
      "loss": 6.3915,
      "step": 258
    },
    {
      "epoch": 0.00259,
      "grad_norm": 0.46096026590523803,
      "learning_rate": 0.000777,
      "loss": 6.3904,
      "step": 259
    },
    {
      "epoch": 0.0026,
      "grad_norm": 0.36710644781296853,
      "learning_rate": 0.0007800000000000001,
      "loss": 6.3738,
      "step": 260
    },
    {
      "epoch": 0.00261,
      "grad_norm": 0.3228962429333065,
      "learning_rate": 0.0007830000000000001,
      "loss": 6.3839,
      "step": 261
    },
    {
      "epoch": 0.00262,
      "grad_norm": 0.33723485694460476,
      "learning_rate": 0.000786,
      "loss": 6.3601,
      "step": 262
    },
    {
      "epoch": 0.00263,
      "grad_norm": 0.3792844357509868,
      "learning_rate": 0.0007890000000000001,
      "loss": 6.3664,
      "step": 263
    },
    {
      "epoch": 0.00264,
      "grad_norm": 0.41999467297545995,
      "learning_rate": 0.0007920000000000001,
      "loss": 6.3505,
      "step": 264
    },
    {
      "epoch": 0.00265,
      "grad_norm": 0.5058327981670172,
      "learning_rate": 0.000795,
      "loss": 6.3427,
      "step": 265
    },
    {
      "epoch": 0.00266,
      "grad_norm": 0.728666013298328,
      "learning_rate": 0.0007980000000000001,
      "loss": 6.3445,
      "step": 266
    },
    {
      "epoch": 0.00267,
      "grad_norm": 1.051597167429085,
      "learning_rate": 0.0008010000000000001,
      "loss": 6.3504,
      "step": 267
    },
    {
      "epoch": 0.00268,
      "grad_norm": 0.8184600443662068,
      "learning_rate": 0.000804,
      "loss": 6.3425,
      "step": 268
    },
    {
      "epoch": 0.00269,
      "grad_norm": 0.9044150906220578,
      "learning_rate": 0.0008070000000000001,
      "loss": 6.3249,
      "step": 269
    },
    {
      "epoch": 0.0027,
      "grad_norm": 1.128589514160359,
      "learning_rate": 0.0008100000000000001,
      "loss": 6.3418,
      "step": 270
    },
    {
      "epoch": 0.00271,
      "grad_norm": 0.8269681270889044,
      "learning_rate": 0.000813,
      "loss": 6.3324,
      "step": 271
    },
    {
      "epoch": 0.00272,
      "grad_norm": 0.9605488749490486,
      "learning_rate": 0.0008160000000000001,
      "loss": 6.3229,
      "step": 272
    },
    {
      "epoch": 0.00273,
      "grad_norm": 1.4256959145221717,
      "learning_rate": 0.0008190000000000001,
      "loss": 6.3332,
      "step": 273
    },
    {
      "epoch": 0.00274,
      "grad_norm": 0.8746481700595169,
      "learning_rate": 0.000822,
      "loss": 6.301,
      "step": 274
    },
    {
      "epoch": 0.00275,
      "grad_norm": 0.7483839590071554,
      "learning_rate": 0.0008250000000000001,
      "loss": 6.3047,
      "step": 275
    },
    {
      "epoch": 0.00276,
      "grad_norm": 0.9683696395132253,
      "learning_rate": 0.0008280000000000001,
      "loss": 6.2965,
      "step": 276
    },
    {
      "epoch": 0.00277,
      "grad_norm": 0.9445364353770574,
      "learning_rate": 0.0008310000000000001,
      "loss": 6.3021,
      "step": 277
    },
    {
      "epoch": 0.00278,
      "grad_norm": 0.5637393663463164,
      "learning_rate": 0.0008340000000000001,
      "loss": 6.2711,
      "step": 278
    },
    {
      "epoch": 0.00279,
      "grad_norm": 0.6474632709262491,
      "learning_rate": 0.0008370000000000001,
      "loss": 6.2722,
      "step": 279
    },
    {
      "epoch": 0.0028,
      "grad_norm": 0.5477535984985658,
      "learning_rate": 0.0008400000000000001,
      "loss": 6.2548,
      "step": 280
    },
    {
      "epoch": 0.00281,
      "grad_norm": 0.5235139271800053,
      "learning_rate": 0.0008430000000000001,
      "loss": 6.2428,
      "step": 281
    },
    {
      "epoch": 0.00282,
      "grad_norm": 0.4309641347762167,
      "learning_rate": 0.000846,
      "loss": 6.2382,
      "step": 282
    },
    {
      "epoch": 0.00283,
      "grad_norm": 0.47598469419389666,
      "learning_rate": 0.0008489999999999999,
      "loss": 6.2321,
      "step": 283
    },
    {
      "epoch": 0.00284,
      "grad_norm": 0.41791767912211497,
      "learning_rate": 0.0008519999999999999,
      "loss": 6.2199,
      "step": 284
    },
    {
      "epoch": 0.00285,
      "grad_norm": 0.4068034669968041,
      "learning_rate": 0.000855,
      "loss": 6.2226,
      "step": 285
    },
    {
      "epoch": 0.00286,
      "grad_norm": 0.461449105404332,
      "learning_rate": 0.0008579999999999999,
      "loss": 6.2004,
      "step": 286
    },
    {
      "epoch": 0.00287,
      "grad_norm": 0.3833700707849029,
      "learning_rate": 0.000861,
      "loss": 6.2013,
      "step": 287
    },
    {
      "epoch": 0.00288,
      "grad_norm": 0.3960645710165419,
      "learning_rate": 0.000864,
      "loss": 6.1818,
      "step": 288
    },
    {
      "epoch": 0.00289,
      "grad_norm": 0.44431212101949524,
      "learning_rate": 0.0008669999999999999,
      "loss": 6.1961,
      "step": 289
    },
    {
      "epoch": 0.0029,
      "grad_norm": 0.5861155579490576,
      "learning_rate": 0.00087,
      "loss": 6.1786,
      "step": 290
    },
    {
      "epoch": 0.00291,
      "grad_norm": 0.6639450218120423,
      "learning_rate": 0.000873,
      "loss": 6.1739,
      "step": 291
    },
    {
      "epoch": 0.00292,
      "grad_norm": 0.8465664162859037,
      "learning_rate": 0.0008759999999999999,
      "loss": 6.1756,
      "step": 292
    },
    {
      "epoch": 0.00293,
      "grad_norm": 0.9291009677838105,
      "learning_rate": 0.000879,
      "loss": 6.1618,
      "step": 293
    },
    {
      "epoch": 0.00294,
      "grad_norm": 1.3082966000676708,
      "learning_rate": 0.000882,
      "loss": 6.1833,
      "step": 294
    },
    {
      "epoch": 0.00295,
      "grad_norm": 1.1893342417992165,
      "learning_rate": 0.0008849999999999999,
      "loss": 6.1586,
      "step": 295
    },
    {
      "epoch": 0.00296,
      "grad_norm": 0.6546841726146672,
      "learning_rate": 0.000888,
      "loss": 6.1551,
      "step": 296
    },
    {
      "epoch": 0.00297,
      "grad_norm": 0.8416245966488266,
      "learning_rate": 0.000891,
      "loss": 6.1315,
      "step": 297
    },
    {
      "epoch": 0.00298,
      "grad_norm": 0.806301942306892,
      "learning_rate": 0.0008939999999999999,
      "loss": 6.1452,
      "step": 298
    },
    {
      "epoch": 0.00299,
      "grad_norm": 1.0994469943740992,
      "learning_rate": 0.000897,
      "loss": 6.1288,
      "step": 299
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.7425213774845364,
      "learning_rate": 0.0009,
      "loss": 6.123,
      "step": 300
    },
    {
      "epoch": 0.00301,
      "grad_norm": 0.6433032250398036,
      "learning_rate": 0.0009029999999999999,
      "loss": 6.1149,
      "step": 301
    },
    {
      "epoch": 0.00302,
      "grad_norm": 0.5449990789285414,
      "learning_rate": 0.000906,
      "loss": 6.0952,
      "step": 302
    },
    {
      "epoch": 0.00303,
      "grad_norm": 0.5356674479383384,
      "learning_rate": 0.000909,
      "loss": 6.1067,
      "step": 303
    },
    {
      "epoch": 0.00304,
      "grad_norm": 0.5049891146020773,
      "learning_rate": 0.000912,
      "loss": 6.1013,
      "step": 304
    },
    {
      "epoch": 0.00305,
      "grad_norm": 0.48068374440179407,
      "learning_rate": 0.000915,
      "loss": 6.0985,
      "step": 305
    },
    {
      "epoch": 0.00306,
      "grad_norm": 0.39305660615027715,
      "learning_rate": 0.000918,
      "loss": 6.0835,
      "step": 306
    },
    {
      "epoch": 0.00307,
      "grad_norm": 0.4044411514506039,
      "learning_rate": 0.000921,
      "loss": 6.0774,
      "step": 307
    },
    {
      "epoch": 0.00308,
      "grad_norm": 0.3317383414417003,
      "learning_rate": 0.000924,
      "loss": 6.0578,
      "step": 308
    },
    {
      "epoch": 0.00309,
      "grad_norm": 0.4170081186564571,
      "learning_rate": 0.000927,
      "loss": 6.0741,
      "step": 309
    },
    {
      "epoch": 0.0031,
      "grad_norm": 0.3537372632819937,
      "learning_rate": 0.00093,
      "loss": 6.0544,
      "step": 310
    },
    {
      "epoch": 0.00311,
      "grad_norm": 0.3807795877969161,
      "learning_rate": 0.000933,
      "loss": 6.0455,
      "step": 311
    },
    {
      "epoch": 0.00312,
      "grad_norm": 0.36301395919450175,
      "learning_rate": 0.000936,
      "loss": 6.025,
      "step": 312
    },
    {
      "epoch": 0.00313,
      "grad_norm": 0.456154645430038,
      "learning_rate": 0.0009390000000000001,
      "loss": 6.0399,
      "step": 313
    },
    {
      "epoch": 0.00314,
      "grad_norm": 0.6252307526263491,
      "learning_rate": 0.000942,
      "loss": 6.048,
      "step": 314
    },
    {
      "epoch": 0.00315,
      "grad_norm": 1.0054536709858048,
      "learning_rate": 0.000945,
      "loss": 6.0332,
      "step": 315
    },
    {
      "epoch": 0.00316,
      "grad_norm": 1.2671801420052267,
      "learning_rate": 0.0009480000000000001,
      "loss": 6.0322,
      "step": 316
    },
    {
      "epoch": 0.00317,
      "grad_norm": 0.5267566088823273,
      "learning_rate": 0.000951,
      "loss": 6.0198,
      "step": 317
    },
    {
      "epoch": 0.00318,
      "grad_norm": 1.2005145600955376,
      "learning_rate": 0.000954,
      "loss": 6.041,
      "step": 318
    },
    {
      "epoch": 0.00319,
      "grad_norm": 1.004603435172909,
      "learning_rate": 0.0009570000000000001,
      "loss": 6.025,
      "step": 319
    },
    {
      "epoch": 0.0032,
      "grad_norm": 1.1201777687880106,
      "learning_rate": 0.00096,
      "loss": 6.0356,
      "step": 320
    },
    {
      "epoch": 0.00321,
      "grad_norm": 1.0167944206203925,
      "learning_rate": 0.000963,
      "loss": 6.012,
      "step": 321
    },
    {
      "epoch": 0.00322,
      "grad_norm": 1.374478470085335,
      "learning_rate": 0.0009660000000000001,
      "loss": 6.025,
      "step": 322
    },
    {
      "epoch": 0.00323,
      "grad_norm": 0.7765654772896999,
      "learning_rate": 0.000969,
      "loss": 6.0057,
      "step": 323
    },
    {
      "epoch": 0.00324,
      "grad_norm": 0.6987857474960232,
      "learning_rate": 0.0009720000000000001,
      "loss": 6.0035,
      "step": 324
    },
    {
      "epoch": 0.00325,
      "grad_norm": 0.7564069291902213,
      "learning_rate": 0.0009750000000000001,
      "loss": 5.9742,
      "step": 325
    },
    {
      "epoch": 0.00326,
      "grad_norm": 0.7275905653375951,
      "learning_rate": 0.0009780000000000001,
      "loss": 5.9877,
      "step": 326
    },
    {
      "epoch": 0.00327,
      "grad_norm": 0.6984989438722892,
      "learning_rate": 0.000981,
      "loss": 5.9857,
      "step": 327
    },
    {
      "epoch": 0.00328,
      "grad_norm": 0.721382136086564,
      "learning_rate": 0.000984,
      "loss": 5.9627,
      "step": 328
    },
    {
      "epoch": 0.00329,
      "grad_norm": 0.7841656285031653,
      "learning_rate": 0.000987,
      "loss": 5.9649,
      "step": 329
    },
    {
      "epoch": 0.0033,
      "grad_norm": 0.8334397806028965,
      "learning_rate": 0.00099,
      "loss": 5.9568,
      "step": 330
    },
    {
      "epoch": 0.00331,
      "grad_norm": 1.070828806370832,
      "learning_rate": 0.0009930000000000002,
      "loss": 5.9642,
      "step": 331
    },
    {
      "epoch": 0.00332,
      "grad_norm": 1.0826703064607723,
      "learning_rate": 0.0009960000000000001,
      "loss": 5.9538,
      "step": 332
    },
    {
      "epoch": 0.00333,
      "grad_norm": 0.9458285226462658,
      "learning_rate": 0.000999,
      "loss": 5.9361,
      "step": 333
    },
    {
      "epoch": 0.00334,
      "grad_norm": 0.8783572499928525,
      "learning_rate": 0.001002,
      "loss": 5.9443,
      "step": 334
    },
    {
      "epoch": 0.00335,
      "grad_norm": 0.5654549281403035,
      "learning_rate": 0.001005,
      "loss": 5.9354,
      "step": 335
    },
    {
      "epoch": 0.00336,
      "grad_norm": 0.6516350129184051,
      "learning_rate": 0.001008,
      "loss": 5.9285,
      "step": 336
    },
    {
      "epoch": 0.00337,
      "grad_norm": 0.5946351510133966,
      "learning_rate": 0.0010110000000000002,
      "loss": 5.9026,
      "step": 337
    },
    {
      "epoch": 0.00338,
      "grad_norm": 0.46824568572142555,
      "learning_rate": 0.0010140000000000001,
      "loss": 5.9165,
      "step": 338
    },
    {
      "epoch": 0.00339,
      "grad_norm": 0.5098867793571775,
      "learning_rate": 0.0010170000000000001,
      "loss": 5.9215,
      "step": 339
    },
    {
      "epoch": 0.0034,
      "grad_norm": 0.5202026348670868,
      "learning_rate": 0.00102,
      "loss": 5.8881,
      "step": 340
    },
    {
      "epoch": 0.00341,
      "grad_norm": 0.4748523902202147,
      "learning_rate": 0.001023,
      "loss": 5.8705,
      "step": 341
    },
    {
      "epoch": 0.00342,
      "grad_norm": 0.42065780247182966,
      "learning_rate": 0.001026,
      "loss": 5.8732,
      "step": 342
    },
    {
      "epoch": 0.00343,
      "grad_norm": 0.4385189981740614,
      "learning_rate": 0.0010290000000000002,
      "loss": 5.8762,
      "step": 343
    },
    {
      "epoch": 0.00344,
      "grad_norm": 0.44131626809067537,
      "learning_rate": 0.001032,
      "loss": 5.8646,
      "step": 344
    },
    {
      "epoch": 0.00345,
      "grad_norm": 0.5602404326473228,
      "learning_rate": 0.001035,
      "loss": 5.8576,
      "step": 345
    },
    {
      "epoch": 0.00346,
      "grad_norm": 0.7338782884674983,
      "learning_rate": 0.0010379999999999999,
      "loss": 5.8684,
      "step": 346
    },
    {
      "epoch": 0.00347,
      "grad_norm": 0.8871217127911775,
      "learning_rate": 0.001041,
      "loss": 5.8429,
      "step": 347
    },
    {
      "epoch": 0.00348,
      "grad_norm": 0.7294005585257834,
      "learning_rate": 0.001044,
      "loss": 5.8498,
      "step": 348
    },
    {
      "epoch": 0.00349,
      "grad_norm": 0.672488440407315,
      "learning_rate": 0.001047,
      "loss": 5.8268,
      "step": 349
    },
    {
      "epoch": 0.0035,
      "grad_norm": 0.944274771030096,
      "learning_rate": 0.00105,
      "loss": 5.8574,
      "step": 350
    },
    {
      "epoch": 0.00351,
      "grad_norm": 0.8431353927439175,
      "learning_rate": 0.001053,
      "loss": 5.8586,
      "step": 351
    },
    {
      "epoch": 0.00352,
      "grad_norm": 0.8223595555288745,
      "learning_rate": 0.0010559999999999999,
      "loss": 5.8326,
      "step": 352
    },
    {
      "epoch": 0.00353,
      "grad_norm": 0.7412573085023916,
      "learning_rate": 0.001059,
      "loss": 5.8336,
      "step": 353
    },
    {
      "epoch": 0.00354,
      "grad_norm": 0.7219192917744488,
      "learning_rate": 0.001062,
      "loss": 5.8047,
      "step": 354
    },
    {
      "epoch": 0.00355,
      "grad_norm": 0.7332482461779326,
      "learning_rate": 0.001065,
      "loss": 5.8213,
      "step": 355
    },
    {
      "epoch": 0.00356,
      "grad_norm": 0.6300796792438244,
      "learning_rate": 0.001068,
      "loss": 5.8096,
      "step": 356
    },
    {
      "epoch": 0.00357,
      "grad_norm": 0.7796926368866208,
      "learning_rate": 0.001071,
      "loss": 5.802,
      "step": 357
    },
    {
      "epoch": 0.00358,
      "grad_norm": 1.048118676098695,
      "learning_rate": 0.001074,
      "loss": 5.8089,
      "step": 358
    },
    {
      "epoch": 0.00359,
      "grad_norm": 0.8469283052653666,
      "learning_rate": 0.001077,
      "loss": 5.7908,
      "step": 359
    },
    {
      "epoch": 0.0036,
      "grad_norm": 0.824969051981288,
      "learning_rate": 0.00108,
      "loss": 5.7948,
      "step": 360
    },
    {
      "epoch": 0.00361,
      "grad_norm": 0.8007424119602606,
      "learning_rate": 0.001083,
      "loss": 5.7876,
      "step": 361
    },
    {
      "epoch": 0.00362,
      "grad_norm": 0.7740442530973947,
      "learning_rate": 0.001086,
      "loss": 5.7713,
      "step": 362
    },
    {
      "epoch": 0.00363,
      "grad_norm": 0.8217790100779416,
      "learning_rate": 0.001089,
      "loss": 5.7673,
      "step": 363
    },
    {
      "epoch": 0.00364,
      "grad_norm": 0.9222078053619298,
      "learning_rate": 0.001092,
      "loss": 5.7813,
      "step": 364
    },
    {
      "epoch": 0.00365,
      "grad_norm": 1.3725693861639678,
      "learning_rate": 0.001095,
      "loss": 5.801,
      "step": 365
    },
    {
      "epoch": 0.00366,
      "grad_norm": 0.7809021294214551,
      "learning_rate": 0.001098,
      "loss": 5.7587,
      "step": 366
    },
    {
      "epoch": 0.00367,
      "grad_norm": 1.1843193874441642,
      "learning_rate": 0.001101,
      "loss": 5.7731,
      "step": 367
    },
    {
      "epoch": 0.00368,
      "grad_norm": 0.7875897064576857,
      "learning_rate": 0.001104,
      "loss": 5.7636,
      "step": 368
    },
    {
      "epoch": 0.00369,
      "grad_norm": 0.6858717859751121,
      "learning_rate": 0.001107,
      "loss": 5.7637,
      "step": 369
    },
    {
      "epoch": 0.0037,
      "grad_norm": 0.6252688126791845,
      "learning_rate": 0.00111,
      "loss": 5.7428,
      "step": 370
    },
    {
      "epoch": 0.00371,
      "grad_norm": 0.6219765776345385,
      "learning_rate": 0.001113,
      "loss": 5.7116,
      "step": 371
    },
    {
      "epoch": 0.00372,
      "grad_norm": 0.5998751494739524,
      "learning_rate": 0.001116,
      "loss": 5.7331,
      "step": 372
    },
    {
      "epoch": 0.00373,
      "grad_norm": 0.5714053922949759,
      "learning_rate": 0.001119,
      "loss": 5.72,
      "step": 373
    },
    {
      "epoch": 0.00374,
      "grad_norm": 0.628102864915935,
      "learning_rate": 0.001122,
      "loss": 5.7262,
      "step": 374
    },
    {
      "epoch": 0.00375,
      "grad_norm": 0.5909991857975568,
      "learning_rate": 0.0011250000000000001,
      "loss": 5.7276,
      "step": 375
    },
    {
      "epoch": 0.00376,
      "grad_norm": 0.5216592986380127,
      "learning_rate": 0.001128,
      "loss": 5.72,
      "step": 376
    },
    {
      "epoch": 0.00377,
      "grad_norm": 0.5469626358369448,
      "learning_rate": 0.001131,
      "loss": 5.7145,
      "step": 377
    },
    {
      "epoch": 0.00378,
      "grad_norm": 0.6291207985788981,
      "learning_rate": 0.001134,
      "loss": 5.7123,
      "step": 378
    },
    {
      "epoch": 0.00379,
      "grad_norm": 0.6445779804368097,
      "learning_rate": 0.001137,
      "loss": 5.698,
      "step": 379
    },
    {
      "epoch": 0.0038,
      "grad_norm": 0.7034810438425685,
      "learning_rate": 0.00114,
      "loss": 5.7177,
      "step": 380
    },
    {
      "epoch": 0.00381,
      "grad_norm": 0.9330449756794958,
      "learning_rate": 0.0011430000000000001,
      "loss": 5.6924,
      "step": 381
    },
    {
      "epoch": 0.00382,
      "grad_norm": 0.9449582673655755,
      "learning_rate": 0.001146,
      "loss": 5.6863,
      "step": 382
    },
    {
      "epoch": 0.00383,
      "grad_norm": 0.6715143882341864,
      "learning_rate": 0.001149,
      "loss": 5.6846,
      "step": 383
    },
    {
      "epoch": 0.00384,
      "grad_norm": 0.8662082921806001,
      "learning_rate": 0.001152,
      "loss": 5.699,
      "step": 384
    },
    {
      "epoch": 0.00385,
      "grad_norm": 0.8482542645994808,
      "learning_rate": 0.001155,
      "loss": 5.6818,
      "step": 385
    },
    {
      "epoch": 0.00386,
      "grad_norm": 0.8694120663581818,
      "learning_rate": 0.001158,
      "loss": 5.6914,
      "step": 386
    },
    {
      "epoch": 0.00387,
      "grad_norm": 1.382803938865885,
      "learning_rate": 0.0011610000000000001,
      "loss": 5.6813,
      "step": 387
    },
    {
      "epoch": 0.00388,
      "grad_norm": 0.9644228724095026,
      "learning_rate": 0.001164,
      "loss": 5.6689,
      "step": 388
    },
    {
      "epoch": 0.00389,
      "grad_norm": 0.7396617808819147,
      "learning_rate": 0.001167,
      "loss": 5.6709,
      "step": 389
    },
    {
      "epoch": 0.0039,
      "grad_norm": 0.5609185173455993,
      "learning_rate": 0.00117,
      "loss": 5.648,
      "step": 390
    },
    {
      "epoch": 0.00391,
      "grad_norm": 0.735416794156057,
      "learning_rate": 0.001173,
      "loss": 5.6589,
      "step": 391
    },
    {
      "epoch": 0.00392,
      "grad_norm": 0.7564046131732848,
      "learning_rate": 0.001176,
      "loss": 5.6662,
      "step": 392
    },
    {
      "epoch": 0.00393,
      "grad_norm": 0.7233997204719024,
      "learning_rate": 0.0011790000000000001,
      "loss": 5.6377,
      "step": 393
    },
    {
      "epoch": 0.00394,
      "grad_norm": 0.6288246845478384,
      "learning_rate": 0.001182,
      "loss": 5.6239,
      "step": 394
    },
    {
      "epoch": 0.00395,
      "grad_norm": 0.6696319293361586,
      "learning_rate": 0.001185,
      "loss": 5.6102,
      "step": 395
    },
    {
      "epoch": 0.00396,
      "grad_norm": 0.7214553632515296,
      "learning_rate": 0.001188,
      "loss": 5.6276,
      "step": 396
    },
    {
      "epoch": 0.00397,
      "grad_norm": 0.7104651338358826,
      "learning_rate": 0.001191,
      "loss": 5.619,
      "step": 397
    },
    {
      "epoch": 0.00398,
      "grad_norm": 0.5955487966253655,
      "learning_rate": 0.0011940000000000002,
      "loss": 5.6195,
      "step": 398
    },
    {
      "epoch": 0.00399,
      "grad_norm": 0.6061151049974988,
      "learning_rate": 0.0011970000000000001,
      "loss": 5.6211,
      "step": 399
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.747975845260549,
      "learning_rate": 0.0012000000000000001,
      "loss": 5.6119,
      "step": 400
    },
    {
      "epoch": 0.00401,
      "grad_norm": 1.066378470212407,
      "learning_rate": 0.001203,
      "loss": 5.6055,
      "step": 401
    },
    {
      "epoch": 0.00402,
      "grad_norm": 1.1521859370487337,
      "learning_rate": 0.001206,
      "loss": 5.6144,
      "step": 402
    },
    {
      "epoch": 0.00403,
      "grad_norm": 1.0137206411467539,
      "learning_rate": 0.001209,
      "loss": 5.6121,
      "step": 403
    },
    {
      "epoch": 0.00404,
      "grad_norm": 1.1663989264954397,
      "learning_rate": 0.0012120000000000002,
      "loss": 5.6177,
      "step": 404
    },
    {
      "epoch": 0.00405,
      "grad_norm": 1.1363050593119097,
      "learning_rate": 0.0012150000000000002,
      "loss": 5.6098,
      "step": 405
    },
    {
      "epoch": 0.00406,
      "grad_norm": 1.0087910495777368,
      "learning_rate": 0.0012180000000000001,
      "loss": 5.5952,
      "step": 406
    },
    {
      "epoch": 0.00407,
      "grad_norm": 0.9520738669869687,
      "learning_rate": 0.0012209999999999999,
      "loss": 5.5915,
      "step": 407
    },
    {
      "epoch": 0.00408,
      "grad_norm": 1.0377478362348644,
      "learning_rate": 0.001224,
      "loss": 5.6129,
      "step": 408
    },
    {
      "epoch": 0.00409,
      "grad_norm": 1.1016437762309168,
      "learning_rate": 0.001227,
      "loss": 5.5927,
      "step": 409
    },
    {
      "epoch": 0.0041,
      "grad_norm": 0.9709841842795465,
      "learning_rate": 0.00123,
      "loss": 5.5883,
      "step": 410
    },
    {
      "epoch": 0.00411,
      "grad_norm": 0.8240534275873757,
      "learning_rate": 0.001233,
      "loss": 5.5977,
      "step": 411
    },
    {
      "epoch": 0.00412,
      "grad_norm": 0.8077831494606339,
      "learning_rate": 0.001236,
      "loss": 5.5894,
      "step": 412
    },
    {
      "epoch": 0.00413,
      "grad_norm": 0.7433003641205871,
      "learning_rate": 0.0012389999999999999,
      "loss": 5.5815,
      "step": 413
    },
    {
      "epoch": 0.00414,
      "grad_norm": 0.615571577509929,
      "learning_rate": 0.001242,
      "loss": 5.5582,
      "step": 414
    },
    {
      "epoch": 0.00415,
      "grad_norm": 0.656841702591939,
      "learning_rate": 0.001245,
      "loss": 5.5606,
      "step": 415
    },
    {
      "epoch": 0.00416,
      "grad_norm": 0.613196865035469,
      "learning_rate": 0.001248,
      "loss": 5.5418,
      "step": 416
    },
    {
      "epoch": 0.00417,
      "grad_norm": 0.48631699887544017,
      "learning_rate": 0.001251,
      "loss": 5.5683,
      "step": 417
    },
    {
      "epoch": 0.00418,
      "grad_norm": 0.4611320175617472,
      "learning_rate": 0.001254,
      "loss": 5.5401,
      "step": 418
    },
    {
      "epoch": 0.00419,
      "grad_norm": 0.5824396705507784,
      "learning_rate": 0.0012569999999999999,
      "loss": 5.5305,
      "step": 419
    },
    {
      "epoch": 0.0042,
      "grad_norm": 0.8055087238093066,
      "learning_rate": 0.00126,
      "loss": 5.5467,
      "step": 420
    },
    {
      "epoch": 0.00421,
      "grad_norm": 0.9831917562206494,
      "learning_rate": 0.001263,
      "loss": 5.5503,
      "step": 421
    },
    {
      "epoch": 0.00422,
      "grad_norm": 0.8124592037579013,
      "learning_rate": 0.001266,
      "loss": 5.5357,
      "step": 422
    },
    {
      "epoch": 0.00423,
      "grad_norm": 1.0116811247233115,
      "learning_rate": 0.001269,
      "loss": 5.5337,
      "step": 423
    },
    {
      "epoch": 0.00424,
      "grad_norm": 1.368200107021336,
      "learning_rate": 0.001272,
      "loss": 5.5595,
      "step": 424
    },
    {
      "epoch": 0.00425,
      "grad_norm": 0.9353292964204464,
      "learning_rate": 0.001275,
      "loss": 5.5445,
      "step": 425
    },
    {
      "epoch": 0.00426,
      "grad_norm": 0.9604600896045714,
      "learning_rate": 0.001278,
      "loss": 5.5493,
      "step": 426
    },
    {
      "epoch": 0.00427,
      "grad_norm": 0.9684632680550875,
      "learning_rate": 0.001281,
      "loss": 5.5328,
      "step": 427
    },
    {
      "epoch": 0.00428,
      "grad_norm": 0.9533788870858234,
      "learning_rate": 0.001284,
      "loss": 5.5058,
      "step": 428
    },
    {
      "epoch": 0.00429,
      "grad_norm": 1.113271451738206,
      "learning_rate": 0.001287,
      "loss": 5.5322,
      "step": 429
    },
    {
      "epoch": 0.0043,
      "grad_norm": 0.9492520989573756,
      "learning_rate": 0.00129,
      "loss": 5.5226,
      "step": 430
    },
    {
      "epoch": 0.00431,
      "grad_norm": 1.1309185966069144,
      "learning_rate": 0.001293,
      "loss": 5.5118,
      "step": 431
    },
    {
      "epoch": 0.00432,
      "grad_norm": 1.2851571092183034,
      "learning_rate": 0.001296,
      "loss": 5.5367,
      "step": 432
    },
    {
      "epoch": 0.00433,
      "grad_norm": 0.7219415163940603,
      "learning_rate": 0.001299,
      "loss": 5.5106,
      "step": 433
    },
    {
      "epoch": 0.00434,
      "grad_norm": 0.7943786167833424,
      "learning_rate": 0.001302,
      "loss": 5.5109,
      "step": 434
    },
    {
      "epoch": 0.00435,
      "grad_norm": 0.7514833526497924,
      "learning_rate": 0.001305,
      "loss": 5.5051,
      "step": 435
    },
    {
      "epoch": 0.00436,
      "grad_norm": 0.5499552719714149,
      "learning_rate": 0.001308,
      "loss": 5.4758,
      "step": 436
    },
    {
      "epoch": 0.00437,
      "grad_norm": 0.5803132978630802,
      "learning_rate": 0.001311,
      "loss": 5.494,
      "step": 437
    },
    {
      "epoch": 0.00438,
      "grad_norm": 0.4343754614294673,
      "learning_rate": 0.001314,
      "loss": 5.4701,
      "step": 438
    },
    {
      "epoch": 0.00439,
      "grad_norm": 0.49450539118744047,
      "learning_rate": 0.001317,
      "loss": 5.4997,
      "step": 439
    },
    {
      "epoch": 0.0044,
      "grad_norm": 0.3670496641121159,
      "learning_rate": 0.00132,
      "loss": 5.4633,
      "step": 440
    },
    {
      "epoch": 0.00441,
      "grad_norm": 0.44023069828614625,
      "learning_rate": 0.001323,
      "loss": 5.4624,
      "step": 441
    },
    {
      "epoch": 0.00442,
      "grad_norm": 0.45124022563783683,
      "learning_rate": 0.0013260000000000001,
      "loss": 5.4541,
      "step": 442
    },
    {
      "epoch": 0.00443,
      "grad_norm": 0.5170657075032803,
      "learning_rate": 0.001329,
      "loss": 5.4572,
      "step": 443
    },
    {
      "epoch": 0.00444,
      "grad_norm": 0.6406930762525409,
      "learning_rate": 0.001332,
      "loss": 5.4608,
      "step": 444
    },
    {
      "epoch": 0.00445,
      "grad_norm": 0.7431205120059805,
      "learning_rate": 0.001335,
      "loss": 5.4462,
      "step": 445
    },
    {
      "epoch": 0.00446,
      "grad_norm": 0.9849435734941417,
      "learning_rate": 0.001338,
      "loss": 5.4578,
      "step": 446
    },
    {
      "epoch": 0.00447,
      "grad_norm": 1.1031854356255555,
      "learning_rate": 0.001341,
      "loss": 5.469,
      "step": 447
    },
    {
      "epoch": 0.00448,
      "grad_norm": 0.8550264287932019,
      "learning_rate": 0.0013440000000000001,
      "loss": 5.4411,
      "step": 448
    },
    {
      "epoch": 0.00449,
      "grad_norm": 0.8569858280927948,
      "learning_rate": 0.001347,
      "loss": 5.4491,
      "step": 449
    },
    {
      "epoch": 0.0045,
      "grad_norm": 0.766625402040655,
      "learning_rate": 0.00135,
      "loss": 5.4461,
      "step": 450
    },
    {
      "epoch": 0.00451,
      "grad_norm": 0.8992860281272513,
      "learning_rate": 0.001353,
      "loss": 5.4411,
      "step": 451
    },
    {
      "epoch": 0.00452,
      "grad_norm": 0.9587820940422671,
      "learning_rate": 0.001356,
      "loss": 5.4273,
      "step": 452
    },
    {
      "epoch": 0.00453,
      "grad_norm": 0.9706881368414635,
      "learning_rate": 0.001359,
      "loss": 5.4204,
      "step": 453
    },
    {
      "epoch": 0.00454,
      "grad_norm": 1.1942708782025127,
      "learning_rate": 0.0013620000000000001,
      "loss": 5.4481,
      "step": 454
    },
    {
      "epoch": 0.00455,
      "grad_norm": 1.0651260215062406,
      "learning_rate": 0.0013650000000000001,
      "loss": 5.4483,
      "step": 455
    },
    {
      "epoch": 0.00456,
      "grad_norm": 1.0894309440916736,
      "learning_rate": 0.001368,
      "loss": 5.4405,
      "step": 456
    },
    {
      "epoch": 0.00457,
      "grad_norm": 1.3632436316792311,
      "learning_rate": 0.001371,
      "loss": 5.442,
      "step": 457
    },
    {
      "epoch": 0.00458,
      "grad_norm": 0.9802910013598249,
      "learning_rate": 0.001374,
      "loss": 5.4423,
      "step": 458
    },
    {
      "epoch": 0.00459,
      "grad_norm": 0.9681577835390196,
      "learning_rate": 0.0013770000000000002,
      "loss": 5.4314,
      "step": 459
    },
    {
      "epoch": 0.0046,
      "grad_norm": 0.955881976726698,
      "learning_rate": 0.0013800000000000002,
      "loss": 5.4146,
      "step": 460
    },
    {
      "epoch": 0.00461,
      "grad_norm": 1.072138092727722,
      "learning_rate": 0.0013830000000000001,
      "loss": 5.4186,
      "step": 461
    },
    {
      "epoch": 0.00462,
      "grad_norm": 0.8672451154734739,
      "learning_rate": 0.001386,
      "loss": 5.4193,
      "step": 462
    },
    {
      "epoch": 0.00463,
      "grad_norm": 0.9908998654710571,
      "learning_rate": 0.001389,
      "loss": 5.4011,
      "step": 463
    },
    {
      "epoch": 0.00464,
      "grad_norm": 1.1599842863140275,
      "learning_rate": 0.001392,
      "loss": 5.4321,
      "step": 464
    },
    {
      "epoch": 0.00465,
      "grad_norm": 0.698212811337685,
      "learning_rate": 0.0013950000000000002,
      "loss": 5.3861,
      "step": 465
    },
    {
      "epoch": 0.00466,
      "grad_norm": 0.6209828526031017,
      "learning_rate": 0.0013980000000000002,
      "loss": 5.4004,
      "step": 466
    },
    {
      "epoch": 0.00467,
      "grad_norm": 0.626908730655191,
      "learning_rate": 0.0014010000000000001,
      "loss": 5.3894,
      "step": 467
    },
    {
      "epoch": 0.00468,
      "grad_norm": 0.6046317726003113,
      "learning_rate": 0.001404,
      "loss": 5.3787,
      "step": 468
    },
    {
      "epoch": 0.00469,
      "grad_norm": 0.5283854127913247,
      "learning_rate": 0.001407,
      "loss": 5.3767,
      "step": 469
    },
    {
      "epoch": 0.0047,
      "grad_norm": 0.5494342704278756,
      "learning_rate": 0.00141,
      "loss": 5.3859,
      "step": 470
    },
    {
      "epoch": 0.00471,
      "grad_norm": 0.6548172150181142,
      "learning_rate": 0.001413,
      "loss": 5.3653,
      "step": 471
    },
    {
      "epoch": 0.00472,
      "grad_norm": 0.8352552132751145,
      "learning_rate": 0.001416,
      "loss": 5.3799,
      "step": 472
    },
    {
      "epoch": 0.00473,
      "grad_norm": 1.04775859687285,
      "learning_rate": 0.001419,
      "loss": 5.3985,
      "step": 473
    },
    {
      "epoch": 0.00474,
      "grad_norm": 0.9808295146566943,
      "learning_rate": 0.0014219999999999999,
      "loss": 5.3787,
      "step": 474
    },
    {
      "epoch": 0.00475,
      "grad_norm": 1.0065096729700158,
      "learning_rate": 0.001425,
      "loss": 5.3725,
      "step": 475
    },
    {
      "epoch": 0.00476,
      "grad_norm": 1.0604383614131039,
      "learning_rate": 0.001428,
      "loss": 5.4172,
      "step": 476
    },
    {
      "epoch": 0.00477,
      "grad_norm": 0.5551620314639825,
      "learning_rate": 0.001431,
      "loss": 5.3484,
      "step": 477
    },
    {
      "epoch": 0.00478,
      "grad_norm": 0.6446758952809472,
      "learning_rate": 0.001434,
      "loss": 5.3819,
      "step": 478
    },
    {
      "epoch": 0.00479,
      "grad_norm": 0.6811842486215005,
      "learning_rate": 0.001437,
      "loss": 5.3551,
      "step": 479
    },
    {
      "epoch": 0.0048,
      "grad_norm": 0.6985415352827566,
      "learning_rate": 0.0014399999999999999,
      "loss": 5.3502,
      "step": 480
    },
    {
      "epoch": 0.00481,
      "grad_norm": 0.7430626707063542,
      "learning_rate": 0.001443,
      "loss": 5.3537,
      "step": 481
    },
    {
      "epoch": 0.00482,
      "grad_norm": 0.7883555304691023,
      "learning_rate": 0.001446,
      "loss": 5.3281,
      "step": 482
    },
    {
      "epoch": 0.00483,
      "grad_norm": 0.9604272864715826,
      "learning_rate": 0.001449,
      "loss": 5.3455,
      "step": 483
    },
    {
      "epoch": 0.00484,
      "grad_norm": 0.9435245256880889,
      "learning_rate": 0.001452,
      "loss": 5.3494,
      "step": 484
    },
    {
      "epoch": 0.00485,
      "grad_norm": 0.8042883678590446,
      "learning_rate": 0.001455,
      "loss": 5.3511,
      "step": 485
    },
    {
      "epoch": 0.00486,
      "grad_norm": 0.7958734942371152,
      "learning_rate": 0.001458,
      "loss": 5.3247,
      "step": 486
    },
    {
      "epoch": 0.00487,
      "grad_norm": 0.7735191187819994,
      "learning_rate": 0.001461,
      "loss": 5.3397,
      "step": 487
    },
    {
      "epoch": 0.00488,
      "grad_norm": 1.012081783792506,
      "learning_rate": 0.001464,
      "loss": 5.3265,
      "step": 488
    },
    {
      "epoch": 0.00489,
      "grad_norm": 0.8905045147335564,
      "learning_rate": 0.001467,
      "loss": 5.3131,
      "step": 489
    },
    {
      "epoch": 0.0049,
      "grad_norm": 0.8898357576674187,
      "learning_rate": 0.00147,
      "loss": 5.3259,
      "step": 490
    },
    {
      "epoch": 0.00491,
      "grad_norm": 0.9415043482581119,
      "learning_rate": 0.001473,
      "loss": 5.3269,
      "step": 491
    },
    {
      "epoch": 0.00492,
      "grad_norm": 0.8320755719970162,
      "learning_rate": 0.001476,
      "loss": 5.3196,
      "step": 492
    },
    {
      "epoch": 0.00493,
      "grad_norm": 0.8445852574519659,
      "learning_rate": 0.001479,
      "loss": 5.3145,
      "step": 493
    },
    {
      "epoch": 0.00494,
      "grad_norm": 1.055556747775556,
      "learning_rate": 0.001482,
      "loss": 5.3274,
      "step": 494
    },
    {
      "epoch": 0.00495,
      "grad_norm": 0.8469107680360277,
      "learning_rate": 0.001485,
      "loss": 5.3268,
      "step": 495
    },
    {
      "epoch": 0.00496,
      "grad_norm": 0.7635320378283988,
      "learning_rate": 0.001488,
      "loss": 5.3083,
      "step": 496
    },
    {
      "epoch": 0.00497,
      "grad_norm": 0.8508130432624411,
      "learning_rate": 0.001491,
      "loss": 5.2927,
      "step": 497
    },
    {
      "epoch": 0.00498,
      "grad_norm": 0.6847944056515368,
      "learning_rate": 0.001494,
      "loss": 5.3134,
      "step": 498
    },
    {
      "epoch": 0.00499,
      "grad_norm": 0.73441383592788,
      "learning_rate": 0.001497,
      "loss": 5.2894,
      "step": 499
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.6599139353534871,
      "learning_rate": 0.0015,
      "loss": 5.2898,
      "step": 500
    },
    {
      "epoch": 0.00501,
      "grad_norm": 0.5852764030687648,
      "learning_rate": 0.001503,
      "loss": 5.2881,
      "step": 501
    },
    {
      "epoch": 0.00502,
      "grad_norm": 0.6853021422001514,
      "learning_rate": 0.001506,
      "loss": 5.2946,
      "step": 502
    },
    {
      "epoch": 0.00503,
      "grad_norm": 0.720141729806561,
      "learning_rate": 0.0015090000000000001,
      "loss": 5.2988,
      "step": 503
    },
    {
      "epoch": 0.00504,
      "grad_norm": 0.7571759373525393,
      "learning_rate": 0.001512,
      "loss": 5.2829,
      "step": 504
    },
    {
      "epoch": 0.00505,
      "grad_norm": 0.7892633543044816,
      "learning_rate": 0.001515,
      "loss": 5.2795,
      "step": 505
    },
    {
      "epoch": 0.00506,
      "grad_norm": 0.7628738296895545,
      "learning_rate": 0.001518,
      "loss": 5.2778,
      "step": 506
    },
    {
      "epoch": 0.00507,
      "grad_norm": 0.7334519380566521,
      "learning_rate": 0.001521,
      "loss": 5.2722,
      "step": 507
    },
    {
      "epoch": 0.00508,
      "grad_norm": 0.7350882061617702,
      "learning_rate": 0.001524,
      "loss": 5.2693,
      "step": 508
    },
    {
      "epoch": 0.00509,
      "grad_norm": 0.7407389039658853,
      "learning_rate": 0.0015270000000000001,
      "loss": 5.2562,
      "step": 509
    },
    {
      "epoch": 0.0051,
      "grad_norm": 0.7104400166661097,
      "learning_rate": 0.0015300000000000001,
      "loss": 5.2696,
      "step": 510
    },
    {
      "epoch": 0.00511,
      "grad_norm": 0.8484400275698938,
      "learning_rate": 0.001533,
      "loss": 5.2541,
      "step": 511
    },
    {
      "epoch": 0.00512,
      "grad_norm": 0.7909533536283434,
      "learning_rate": 0.001536,
      "loss": 5.2475,
      "step": 512
    },
    {
      "epoch": 0.00513,
      "grad_norm": 0.9481517521047255,
      "learning_rate": 0.001539,
      "loss": 5.2684,
      "step": 513
    },
    {
      "epoch": 0.00514,
      "grad_norm": 1.405665610624422,
      "learning_rate": 0.001542,
      "loss": 5.2742,
      "step": 514
    },
    {
      "epoch": 0.00515,
      "grad_norm": 0.9237050984729354,
      "learning_rate": 0.0015450000000000001,
      "loss": 5.2636,
      "step": 515
    },
    {
      "epoch": 0.00516,
      "grad_norm": 1.1634719669204763,
      "learning_rate": 0.0015480000000000001,
      "loss": 5.2755,
      "step": 516
    },
    {
      "epoch": 0.00517,
      "grad_norm": 0.8585760471561034,
      "learning_rate": 0.001551,
      "loss": 5.2577,
      "step": 517
    },
    {
      "epoch": 0.00518,
      "grad_norm": 0.728905665920263,
      "learning_rate": 0.001554,
      "loss": 5.2422,
      "step": 518
    },
    {
      "epoch": 0.00519,
      "grad_norm": 0.841003643450708,
      "learning_rate": 0.001557,
      "loss": 5.2472,
      "step": 519
    },
    {
      "epoch": 0.0052,
      "grad_norm": 0.8542329179478354,
      "learning_rate": 0.0015600000000000002,
      "loss": 5.2573,
      "step": 520
    },
    {
      "epoch": 0.00521,
      "grad_norm": 0.9136344705581151,
      "learning_rate": 0.0015630000000000002,
      "loss": 5.2327,
      "step": 521
    },
    {
      "epoch": 0.00522,
      "grad_norm": 1.1142541660381047,
      "learning_rate": 0.0015660000000000001,
      "loss": 5.2435,
      "step": 522
    },
    {
      "epoch": 0.00523,
      "grad_norm": 1.2424782927197504,
      "learning_rate": 0.001569,
      "loss": 5.2642,
      "step": 523
    },
    {
      "epoch": 0.00524,
      "grad_norm": 0.9564855927553343,
      "learning_rate": 0.001572,
      "loss": 5.264,
      "step": 524
    },
    {
      "epoch": 0.00525,
      "grad_norm": 0.8662515649518988,
      "learning_rate": 0.001575,
      "loss": 5.2537,
      "step": 525
    },
    {
      "epoch": 0.00526,
      "grad_norm": 0.72079723918141,
      "learning_rate": 0.0015780000000000002,
      "loss": 5.2224,
      "step": 526
    },
    {
      "epoch": 0.00527,
      "grad_norm": 0.6822789926865414,
      "learning_rate": 0.0015810000000000002,
      "loss": 5.2353,
      "step": 527
    },
    {
      "epoch": 0.00528,
      "grad_norm": 0.6986034229049856,
      "learning_rate": 0.0015840000000000001,
      "loss": 5.2128,
      "step": 528
    },
    {
      "epoch": 0.00529,
      "grad_norm": 0.6935526974121139,
      "learning_rate": 0.001587,
      "loss": 5.2236,
      "step": 529
    },
    {
      "epoch": 0.0053,
      "grad_norm": 0.6635846542220551,
      "learning_rate": 0.00159,
      "loss": 5.2302,
      "step": 530
    },
    {
      "epoch": 0.00531,
      "grad_norm": 0.7197123783695031,
      "learning_rate": 0.001593,
      "loss": 5.2167,
      "step": 531
    },
    {
      "epoch": 0.00532,
      "grad_norm": 0.6862921713046577,
      "learning_rate": 0.0015960000000000002,
      "loss": 5.2205,
      "step": 532
    },
    {
      "epoch": 0.00533,
      "grad_norm": 0.672254585940336,
      "learning_rate": 0.0015990000000000002,
      "loss": 5.2165,
      "step": 533
    },
    {
      "epoch": 0.00534,
      "grad_norm": 0.6180247915886188,
      "learning_rate": 0.0016020000000000001,
      "loss": 5.2018,
      "step": 534
    },
    {
      "epoch": 0.00535,
      "grad_norm": 0.7076887136252737,
      "learning_rate": 0.001605,
      "loss": 5.2099,
      "step": 535
    },
    {
      "epoch": 0.00536,
      "grad_norm": 0.8627381010586813,
      "learning_rate": 0.001608,
      "loss": 5.2158,
      "step": 536
    },
    {
      "epoch": 0.00537,
      "grad_norm": 0.9890089503230703,
      "learning_rate": 0.0016110000000000002,
      "loss": 5.2125,
      "step": 537
    },
    {
      "epoch": 0.00538,
      "grad_norm": 1.0890684861329838,
      "learning_rate": 0.0016140000000000002,
      "loss": 5.1997,
      "step": 538
    },
    {
      "epoch": 0.00539,
      "grad_norm": 0.7898695514456295,
      "learning_rate": 0.0016170000000000002,
      "loss": 5.1885,
      "step": 539
    },
    {
      "epoch": 0.0054,
      "grad_norm": 0.7877858658336557,
      "learning_rate": 0.0016200000000000001,
      "loss": 5.1979,
      "step": 540
    },
    {
      "epoch": 0.00541,
      "grad_norm": 0.8613625325852945,
      "learning_rate": 0.001623,
      "loss": 5.1884,
      "step": 541
    },
    {
      "epoch": 0.00542,
      "grad_norm": 0.9310959295325639,
      "learning_rate": 0.001626,
      "loss": 5.2135,
      "step": 542
    },
    {
      "epoch": 0.00543,
      "grad_norm": 1.0650490028229627,
      "learning_rate": 0.0016290000000000002,
      "loss": 5.1821,
      "step": 543
    },
    {
      "epoch": 0.00544,
      "grad_norm": 1.058155288535174,
      "learning_rate": 0.0016320000000000002,
      "loss": 5.2106,
      "step": 544
    },
    {
      "epoch": 0.00545,
      "grad_norm": 0.7888748411290551,
      "learning_rate": 0.0016350000000000002,
      "loss": 5.2093,
      "step": 545
    },
    {
      "epoch": 0.00546,
      "grad_norm": 0.8327623405474364,
      "learning_rate": 0.0016380000000000001,
      "loss": 5.1821,
      "step": 546
    },
    {
      "epoch": 0.00547,
      "grad_norm": 0.9359889846839972,
      "learning_rate": 0.001641,
      "loss": 5.1867,
      "step": 547
    },
    {
      "epoch": 0.00548,
      "grad_norm": 0.8250520922796094,
      "learning_rate": 0.001644,
      "loss": 5.1781,
      "step": 548
    },
    {
      "epoch": 0.00549,
      "grad_norm": 0.8346002681162655,
      "learning_rate": 0.0016470000000000002,
      "loss": 5.1786,
      "step": 549
    },
    {
      "epoch": 0.0055,
      "grad_norm": 0.8307775357404767,
      "learning_rate": 0.0016500000000000002,
      "loss": 5.1901,
      "step": 550
    },
    {
      "epoch": 0.00551,
      "grad_norm": 0.8868587049067305,
      "learning_rate": 0.0016530000000000002,
      "loss": 5.1882,
      "step": 551
    },
    {
      "epoch": 0.00552,
      "grad_norm": 1.0021721179116465,
      "learning_rate": 0.0016560000000000001,
      "loss": 5.1887,
      "step": 552
    },
    {
      "epoch": 0.00553,
      "grad_norm": 1.0435640829012027,
      "learning_rate": 0.001659,
      "loss": 5.1828,
      "step": 553
    },
    {
      "epoch": 0.00554,
      "grad_norm": 1.091952614784726,
      "learning_rate": 0.0016620000000000003,
      "loss": 5.1843,
      "step": 554
    },
    {
      "epoch": 0.00555,
      "grad_norm": 0.7530351004511261,
      "learning_rate": 0.0016650000000000002,
      "loss": 5.1804,
      "step": 555
    },
    {
      "epoch": 0.00556,
      "grad_norm": 0.7840106393058217,
      "learning_rate": 0.0016680000000000002,
      "loss": 5.1664,
      "step": 556
    },
    {
      "epoch": 0.00557,
      "grad_norm": 0.6999816254444311,
      "learning_rate": 0.0016710000000000002,
      "loss": 5.1437,
      "step": 557
    },
    {
      "epoch": 0.00558,
      "grad_norm": 0.7845980119871422,
      "learning_rate": 0.0016740000000000001,
      "loss": 5.1792,
      "step": 558
    },
    {
      "epoch": 0.00559,
      "grad_norm": 1.002237738003299,
      "learning_rate": 0.001677,
      "loss": 5.165,
      "step": 559
    },
    {
      "epoch": 0.0056,
      "grad_norm": 1.1590360602458978,
      "learning_rate": 0.0016800000000000003,
      "loss": 5.15,
      "step": 560
    },
    {
      "epoch": 0.00561,
      "grad_norm": 0.9693350319936842,
      "learning_rate": 0.0016830000000000003,
      "loss": 5.1673,
      "step": 561
    },
    {
      "epoch": 0.00562,
      "grad_norm": 0.9337806686381701,
      "learning_rate": 0.0016860000000000002,
      "loss": 5.1656,
      "step": 562
    },
    {
      "epoch": 0.00563,
      "grad_norm": 0.7021371152666548,
      "learning_rate": 0.001689,
      "loss": 5.1554,
      "step": 563
    },
    {
      "epoch": 0.00564,
      "grad_norm": 0.761762323285238,
      "learning_rate": 0.001692,
      "loss": 5.1364,
      "step": 564
    },
    {
      "epoch": 0.00565,
      "grad_norm": 0.9962272803832337,
      "learning_rate": 0.001695,
      "loss": 5.1465,
      "step": 565
    },
    {
      "epoch": 0.00566,
      "grad_norm": 0.9270663941212276,
      "learning_rate": 0.0016979999999999999,
      "loss": 5.1454,
      "step": 566
    },
    {
      "epoch": 0.00567,
      "grad_norm": 1.0296751705367089,
      "learning_rate": 0.0017009999999999998,
      "loss": 5.1403,
      "step": 567
    },
    {
      "epoch": 0.00568,
      "grad_norm": 1.0921308967550072,
      "learning_rate": 0.0017039999999999998,
      "loss": 5.1647,
      "step": 568
    },
    {
      "epoch": 0.00569,
      "grad_norm": 0.9680118526613578,
      "learning_rate": 0.001707,
      "loss": 5.1376,
      "step": 569
    },
    {
      "epoch": 0.0057,
      "grad_norm": 1.1373894554942883,
      "learning_rate": 0.00171,
      "loss": 5.1667,
      "step": 570
    },
    {
      "epoch": 0.00571,
      "grad_norm": 0.9491793536820188,
      "learning_rate": 0.001713,
      "loss": 5.1618,
      "step": 571
    },
    {
      "epoch": 0.00572,
      "grad_norm": 1.0938883440367575,
      "learning_rate": 0.0017159999999999999,
      "loss": 5.1499,
      "step": 572
    },
    {
      "epoch": 0.00573,
      "grad_norm": 0.8953824806877001,
      "learning_rate": 0.0017189999999999998,
      "loss": 5.1459,
      "step": 573
    },
    {
      "epoch": 0.00574,
      "grad_norm": 0.8777825611555061,
      "learning_rate": 0.001722,
      "loss": 5.1467,
      "step": 574
    },
    {
      "epoch": 0.00575,
      "grad_norm": 0.9427806830230203,
      "learning_rate": 0.001725,
      "loss": 5.132,
      "step": 575
    },
    {
      "epoch": 0.00576,
      "grad_norm": 0.8846520959631657,
      "learning_rate": 0.001728,
      "loss": 5.1269,
      "step": 576
    },
    {
      "epoch": 0.00577,
      "grad_norm": 0.6344552018416748,
      "learning_rate": 0.001731,
      "loss": 5.1395,
      "step": 577
    },
    {
      "epoch": 0.00578,
      "grad_norm": 0.6771922101340231,
      "learning_rate": 0.0017339999999999999,
      "loss": 5.1199,
      "step": 578
    },
    {
      "epoch": 0.00579,
      "grad_norm": 0.5381676919488529,
      "learning_rate": 0.0017369999999999998,
      "loss": 5.1001,
      "step": 579
    },
    {
      "epoch": 0.0058,
      "grad_norm": 0.5416857271346956,
      "learning_rate": 0.00174,
      "loss": 5.1179,
      "step": 580
    },
    {
      "epoch": 0.00581,
      "grad_norm": 0.4886409554358658,
      "learning_rate": 0.001743,
      "loss": 5.1131,
      "step": 581
    },
    {
      "epoch": 0.00582,
      "grad_norm": 0.3953796837190132,
      "learning_rate": 0.001746,
      "loss": 5.0842,
      "step": 582
    },
    {
      "epoch": 0.00583,
      "grad_norm": 0.3816231764982968,
      "learning_rate": 0.001749,
      "loss": 5.1076,
      "step": 583
    },
    {
      "epoch": 0.00584,
      "grad_norm": 0.39062356918599367,
      "learning_rate": 0.0017519999999999999,
      "loss": 5.1003,
      "step": 584
    },
    {
      "epoch": 0.00585,
      "grad_norm": 0.3915858898605007,
      "learning_rate": 0.0017549999999999998,
      "loss": 5.0837,
      "step": 585
    },
    {
      "epoch": 0.00586,
      "grad_norm": 0.4063811781565053,
      "learning_rate": 0.001758,
      "loss": 5.0866,
      "step": 586
    },
    {
      "epoch": 0.00587,
      "grad_norm": 0.4274699383917251,
      "learning_rate": 0.001761,
      "loss": 5.0709,
      "step": 587
    },
    {
      "epoch": 0.00588,
      "grad_norm": 0.5008029306674315,
      "learning_rate": 0.001764,
      "loss": 5.0767,
      "step": 588
    },
    {
      "epoch": 0.00589,
      "grad_norm": 0.7229655060897183,
      "learning_rate": 0.001767,
      "loss": 5.084,
      "step": 589
    },
    {
      "epoch": 0.0059,
      "grad_norm": 0.9281114393382421,
      "learning_rate": 0.0017699999999999999,
      "loss": 5.0779,
      "step": 590
    },
    {
      "epoch": 0.00591,
      "grad_norm": 0.9689787780419555,
      "learning_rate": 0.001773,
      "loss": 5.0756,
      "step": 591
    },
    {
      "epoch": 0.00592,
      "grad_norm": 0.8435580831880815,
      "learning_rate": 0.001776,
      "loss": 5.0975,
      "step": 592
    },
    {
      "epoch": 0.00593,
      "grad_norm": 0.9986679613531879,
      "learning_rate": 0.001779,
      "loss": 5.098,
      "step": 593
    },
    {
      "epoch": 0.00594,
      "grad_norm": 1.0996840470415932,
      "learning_rate": 0.001782,
      "loss": 5.1014,
      "step": 594
    },
    {
      "epoch": 0.00595,
      "grad_norm": 0.9601369241773534,
      "learning_rate": 0.001785,
      "loss": 5.0957,
      "step": 595
    },
    {
      "epoch": 0.00596,
      "grad_norm": 0.8606352953891906,
      "learning_rate": 0.0017879999999999999,
      "loss": 5.0932,
      "step": 596
    },
    {
      "epoch": 0.00597,
      "grad_norm": 0.8716680293105395,
      "learning_rate": 0.001791,
      "loss": 5.066,
      "step": 597
    },
    {
      "epoch": 0.00598,
      "grad_norm": 0.9954805607751368,
      "learning_rate": 0.001794,
      "loss": 5.0837,
      "step": 598
    },
    {
      "epoch": 0.00599,
      "grad_norm": 1.0139742121604893,
      "learning_rate": 0.001797,
      "loss": 5.0892,
      "step": 599
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.8994983668637855,
      "learning_rate": 0.0018,
      "loss": 5.083,
      "step": 600
    },
    {
      "epoch": 0.00601,
      "grad_norm": 1.012817382509115,
      "learning_rate": 0.001803,
      "loss": 5.0872,
      "step": 601
    },
    {
      "epoch": 0.00602,
      "grad_norm": 0.829792766990954,
      "learning_rate": 0.0018059999999999999,
      "loss": 5.0655,
      "step": 602
    },
    {
      "epoch": 0.00603,
      "grad_norm": 0.8978641621726422,
      "learning_rate": 0.001809,
      "loss": 5.0896,
      "step": 603
    },
    {
      "epoch": 0.00604,
      "grad_norm": 1.006526980400353,
      "learning_rate": 0.001812,
      "loss": 5.0733,
      "step": 604
    },
    {
      "epoch": 0.00605,
      "grad_norm": 0.9664384705752951,
      "learning_rate": 0.001815,
      "loss": 5.0828,
      "step": 605
    },
    {
      "epoch": 0.00606,
      "grad_norm": 1.0962791606856168,
      "learning_rate": 0.001818,
      "loss": 5.0888,
      "step": 606
    },
    {
      "epoch": 0.00607,
      "grad_norm": 0.9313257227556363,
      "learning_rate": 0.001821,
      "loss": 5.0876,
      "step": 607
    },
    {
      "epoch": 0.00608,
      "grad_norm": 0.9207616431206566,
      "learning_rate": 0.001824,
      "loss": 5.068,
      "step": 608
    },
    {
      "epoch": 0.00609,
      "grad_norm": 0.9552257534317795,
      "learning_rate": 0.001827,
      "loss": 5.0751,
      "step": 609
    },
    {
      "epoch": 0.0061,
      "grad_norm": 1.1338881088580717,
      "learning_rate": 0.00183,
      "loss": 5.062,
      "step": 610
    },
    {
      "epoch": 0.00611,
      "grad_norm": 0.907481169345242,
      "learning_rate": 0.001833,
      "loss": 5.0576,
      "step": 611
    },
    {
      "epoch": 0.00612,
      "grad_norm": 1.04757168088542,
      "learning_rate": 0.001836,
      "loss": 5.0686,
      "step": 612
    },
    {
      "epoch": 0.00613,
      "grad_norm": 0.9021638869008188,
      "learning_rate": 0.001839,
      "loss": 5.0559,
      "step": 613
    },
    {
      "epoch": 0.00614,
      "grad_norm": 0.7877209937196055,
      "learning_rate": 0.001842,
      "loss": 5.0768,
      "step": 614
    },
    {
      "epoch": 0.00615,
      "grad_norm": 0.7456491069500266,
      "learning_rate": 0.001845,
      "loss": 5.0572,
      "step": 615
    },
    {
      "epoch": 0.00616,
      "grad_norm": 0.7643816652567319,
      "learning_rate": 0.001848,
      "loss": 5.043,
      "step": 616
    },
    {
      "epoch": 0.00617,
      "grad_norm": 0.8920948602141958,
      "learning_rate": 0.001851,
      "loss": 5.0555,
      "step": 617
    },
    {
      "epoch": 0.00618,
      "grad_norm": 1.0257178323795717,
      "learning_rate": 0.001854,
      "loss": 5.0507,
      "step": 618
    },
    {
      "epoch": 0.00619,
      "grad_norm": 0.7092119630036949,
      "learning_rate": 0.001857,
      "loss": 5.0429,
      "step": 619
    },
    {
      "epoch": 0.0062,
      "grad_norm": 0.6092469798525071,
      "learning_rate": 0.00186,
      "loss": 5.0262,
      "step": 620
    },
    {
      "epoch": 0.00621,
      "grad_norm": 0.7552706683211996,
      "learning_rate": 0.001863,
      "loss": 5.032,
      "step": 621
    },
    {
      "epoch": 0.00622,
      "grad_norm": 0.6988650940233329,
      "learning_rate": 0.001866,
      "loss": 5.0245,
      "step": 622
    },
    {
      "epoch": 0.00623,
      "grad_norm": 0.6062593525325608,
      "learning_rate": 0.001869,
      "loss": 5.0091,
      "step": 623
    },
    {
      "epoch": 0.00624,
      "grad_norm": 0.555479795278756,
      "learning_rate": 0.001872,
      "loss": 5.0161,
      "step": 624
    },
    {
      "epoch": 0.00625,
      "grad_norm": 0.6339519418385815,
      "learning_rate": 0.001875,
      "loss": 5.0276,
      "step": 625
    },
    {
      "epoch": 0.00626,
      "grad_norm": 0.7820017260467441,
      "learning_rate": 0.0018780000000000001,
      "loss": 5.0084,
      "step": 626
    },
    {
      "epoch": 0.00627,
      "grad_norm": 0.8263278266088275,
      "learning_rate": 0.001881,
      "loss": 5.0003,
      "step": 627
    },
    {
      "epoch": 0.00628,
      "grad_norm": 0.5717634931424201,
      "learning_rate": 0.001884,
      "loss": 5.0204,
      "step": 628
    },
    {
      "epoch": 0.00629,
      "grad_norm": 0.5505525631139665,
      "learning_rate": 0.001887,
      "loss": 4.9928,
      "step": 629
    },
    {
      "epoch": 0.0063,
      "grad_norm": 0.47331922258372455,
      "learning_rate": 0.00189,
      "loss": 4.9837,
      "step": 630
    },
    {
      "epoch": 0.00631,
      "grad_norm": 0.4173073508747504,
      "learning_rate": 0.0018930000000000002,
      "loss": 4.9757,
      "step": 631
    },
    {
      "epoch": 0.00632,
      "grad_norm": 0.42808966750972455,
      "learning_rate": 0.0018960000000000001,
      "loss": 4.9928,
      "step": 632
    },
    {
      "epoch": 0.00633,
      "grad_norm": 0.46750467774391197,
      "learning_rate": 0.001899,
      "loss": 4.9879,
      "step": 633
    },
    {
      "epoch": 0.00634,
      "grad_norm": 0.5696215613123979,
      "learning_rate": 0.001902,
      "loss": 4.9763,
      "step": 634
    },
    {
      "epoch": 0.00635,
      "grad_norm": 0.7138673203190975,
      "learning_rate": 0.001905,
      "loss": 4.9686,
      "step": 635
    },
    {
      "epoch": 0.00636,
      "grad_norm": 0.8836310721952346,
      "learning_rate": 0.001908,
      "loss": 4.9828,
      "step": 636
    },
    {
      "epoch": 0.00637,
      "grad_norm": 0.9755328249694639,
      "learning_rate": 0.0019110000000000002,
      "loss": 5.002,
      "step": 637
    },
    {
      "epoch": 0.00638,
      "grad_norm": 1.3272264449165134,
      "learning_rate": 0.0019140000000000001,
      "loss": 5.0051,
      "step": 638
    },
    {
      "epoch": 0.00639,
      "grad_norm": 0.9140856571338883,
      "learning_rate": 0.001917,
      "loss": 4.9726,
      "step": 639
    },
    {
      "epoch": 0.0064,
      "grad_norm": 1.0466736994412218,
      "learning_rate": 0.00192,
      "loss": 4.9869,
      "step": 640
    },
    {
      "epoch": 0.00641,
      "grad_norm": 1.1161814084223103,
      "learning_rate": 0.001923,
      "loss": 5.0003,
      "step": 641
    },
    {
      "epoch": 0.00642,
      "grad_norm": 1.1352153221992676,
      "learning_rate": 0.001926,
      "loss": 4.992,
      "step": 642
    },
    {
      "epoch": 0.00643,
      "grad_norm": 1.2268384891507862,
      "learning_rate": 0.0019290000000000002,
      "loss": 5.008,
      "step": 643
    },
    {
      "epoch": 0.00644,
      "grad_norm": 0.9564122465750423,
      "learning_rate": 0.0019320000000000001,
      "loss": 4.9857,
      "step": 644
    },
    {
      "epoch": 0.00645,
      "grad_norm": 0.9066234784688915,
      "learning_rate": 0.001935,
      "loss": 4.9889,
      "step": 645
    },
    {
      "epoch": 0.00646,
      "grad_norm": 0.894776309426942,
      "learning_rate": 0.001938,
      "loss": 4.995,
      "step": 646
    },
    {
      "epoch": 0.00647,
      "grad_norm": 1.036514260058091,
      "learning_rate": 0.001941,
      "loss": 5.0081,
      "step": 647
    },
    {
      "epoch": 0.00648,
      "grad_norm": 1.0669688261896326,
      "learning_rate": 0.0019440000000000002,
      "loss": 4.9991,
      "step": 648
    },
    {
      "epoch": 0.00649,
      "grad_norm": 1.0027602192299327,
      "learning_rate": 0.0019470000000000002,
      "loss": 4.9834,
      "step": 649
    },
    {
      "epoch": 0.0065,
      "grad_norm": 1.2453243505592877,
      "learning_rate": 0.0019500000000000001,
      "loss": 5.0183,
      "step": 650
    },
    {
      "epoch": 0.00651,
      "grad_norm": 0.9810465710779535,
      "learning_rate": 0.001953,
      "loss": 4.9899,
      "step": 651
    },
    {
      "epoch": 0.00652,
      "grad_norm": 1.0529607937914427,
      "learning_rate": 0.0019560000000000003,
      "loss": 4.9855,
      "step": 652
    },
    {
      "epoch": 0.00653,
      "grad_norm": 0.8367844014470341,
      "learning_rate": 0.0019590000000000002,
      "loss": 4.9719,
      "step": 653
    },
    {
      "epoch": 0.00654,
      "grad_norm": 0.9556352679930769,
      "learning_rate": 0.001962,
      "loss": 4.96,
      "step": 654
    },
    {
      "epoch": 0.00655,
      "grad_norm": 1.0712859822115042,
      "learning_rate": 0.001965,
      "loss": 4.9811,
      "step": 655
    },
    {
      "epoch": 0.00656,
      "grad_norm": 0.8381525542412449,
      "learning_rate": 0.001968,
      "loss": 4.9628,
      "step": 656
    },
    {
      "epoch": 0.00657,
      "grad_norm": 0.853522104902103,
      "learning_rate": 0.001971,
      "loss": 4.9486,
      "step": 657
    },
    {
      "epoch": 0.00658,
      "grad_norm": 0.9276507218825019,
      "learning_rate": 0.001974,
      "loss": 4.9746,
      "step": 658
    },
    {
      "epoch": 0.00659,
      "grad_norm": 1.0041122671115763,
      "learning_rate": 0.001977,
      "loss": 4.9552,
      "step": 659
    },
    {
      "epoch": 0.0066,
      "grad_norm": 0.925609681342879,
      "learning_rate": 0.00198,
      "loss": 4.9516,
      "step": 660
    },
    {
      "epoch": 0.00661,
      "grad_norm": 0.9340897694356347,
      "learning_rate": 0.001983,
      "loss": 4.9301,
      "step": 661
    },
    {
      "epoch": 0.00662,
      "grad_norm": 1.1039924911609775,
      "learning_rate": 0.0019860000000000004,
      "loss": 4.9353,
      "step": 662
    },
    {
      "epoch": 0.00663,
      "grad_norm": 0.842271096957419,
      "learning_rate": 0.0019890000000000003,
      "loss": 4.942,
      "step": 663
    },
    {
      "epoch": 0.00664,
      "grad_norm": 0.9255021185692508,
      "learning_rate": 0.0019920000000000003,
      "loss": 4.9294,
      "step": 664
    },
    {
      "epoch": 0.00665,
      "grad_norm": 0.9624840652606003,
      "learning_rate": 0.0019950000000000002,
      "loss": 4.9033,
      "step": 665
    },
    {
      "epoch": 0.00666,
      "grad_norm": 0.8706912476713129,
      "learning_rate": 0.001998,
      "loss": 4.9275,
      "step": 666
    },
    {
      "epoch": 0.00667,
      "grad_norm": 0.9355886153293348,
      "learning_rate": 0.002001,
      "loss": 4.9186,
      "step": 667
    },
    {
      "epoch": 0.00668,
      "grad_norm": 0.9525697321085718,
      "learning_rate": 0.002004,
      "loss": 4.9422,
      "step": 668
    },
    {
      "epoch": 0.00669,
      "grad_norm": 0.9357519892866003,
      "learning_rate": 0.002007,
      "loss": 4.9423,
      "step": 669
    },
    {
      "epoch": 0.0067,
      "grad_norm": 0.8227655724451596,
      "learning_rate": 0.00201,
      "loss": 4.9287,
      "step": 670
    },
    {
      "epoch": 0.00671,
      "grad_norm": 0.6363703099565691,
      "learning_rate": 0.002013,
      "loss": 4.8975,
      "step": 671
    },
    {
      "epoch": 0.00672,
      "grad_norm": 0.5774852286623988,
      "learning_rate": 0.002016,
      "loss": 4.894,
      "step": 672
    },
    {
      "epoch": 0.00673,
      "grad_norm": 0.5472152449551199,
      "learning_rate": 0.002019,
      "loss": 4.8881,
      "step": 673
    },
    {
      "epoch": 0.00674,
      "grad_norm": 0.5267919584843693,
      "learning_rate": 0.0020220000000000004,
      "loss": 4.8857,
      "step": 674
    },
    {
      "epoch": 0.00675,
      "grad_norm": 0.4823533534454447,
      "learning_rate": 0.0020250000000000003,
      "loss": 4.8922,
      "step": 675
    },
    {
      "epoch": 0.00676,
      "grad_norm": 0.5135326218761617,
      "learning_rate": 0.0020280000000000003,
      "loss": 4.8739,
      "step": 676
    },
    {
      "epoch": 0.00677,
      "grad_norm": 0.5629070673817519,
      "learning_rate": 0.0020310000000000003,
      "loss": 4.8753,
      "step": 677
    },
    {
      "epoch": 0.00678,
      "grad_norm": 0.6636518437686225,
      "learning_rate": 0.0020340000000000002,
      "loss": 4.8844,
      "step": 678
    },
    {
      "epoch": 0.00679,
      "grad_norm": 0.7939150938071041,
      "learning_rate": 0.002037,
      "loss": 4.8694,
      "step": 679
    },
    {
      "epoch": 0.0068,
      "grad_norm": 0.6961050241971515,
      "learning_rate": 0.00204,
      "loss": 4.8681,
      "step": 680
    },
    {
      "epoch": 0.00681,
      "grad_norm": 0.55281231514776,
      "learning_rate": 0.002043,
      "loss": 4.854,
      "step": 681
    },
    {
      "epoch": 0.00682,
      "grad_norm": 0.6966375314025457,
      "learning_rate": 0.002046,
      "loss": 4.8606,
      "step": 682
    },
    {
      "epoch": 0.00683,
      "grad_norm": 0.7919918537287608,
      "learning_rate": 0.002049,
      "loss": 4.8437,
      "step": 683
    },
    {
      "epoch": 0.00684,
      "grad_norm": 1.014505658611748,
      "learning_rate": 0.002052,
      "loss": 4.868,
      "step": 684
    },
    {
      "epoch": 0.00685,
      "grad_norm": 1.1979460789898289,
      "learning_rate": 0.0020550000000000004,
      "loss": 4.8824,
      "step": 685
    },
    {
      "epoch": 0.00686,
      "grad_norm": 1.0209197434565493,
      "learning_rate": 0.0020580000000000004,
      "loss": 4.8555,
      "step": 686
    },
    {
      "epoch": 0.00687,
      "grad_norm": 0.8666785819902827,
      "learning_rate": 0.0020610000000000003,
      "loss": 4.8457,
      "step": 687
    },
    {
      "epoch": 0.00688,
      "grad_norm": 1.066277682185938,
      "learning_rate": 0.002064,
      "loss": 4.8576,
      "step": 688
    },
    {
      "epoch": 0.00689,
      "grad_norm": 0.9564373481813865,
      "learning_rate": 0.002067,
      "loss": 4.8511,
      "step": 689
    },
    {
      "epoch": 0.0069,
      "grad_norm": 1.0921039434762347,
      "learning_rate": 0.00207,
      "loss": 4.854,
      "step": 690
    },
    {
      "epoch": 0.00691,
      "grad_norm": 0.9682767243250711,
      "learning_rate": 0.0020729999999999998,
      "loss": 4.8265,
      "step": 691
    },
    {
      "epoch": 0.00692,
      "grad_norm": 1.0182149348151321,
      "learning_rate": 0.0020759999999999997,
      "loss": 4.8523,
      "step": 692
    },
    {
      "epoch": 0.00693,
      "grad_norm": 1.1559582715656997,
      "learning_rate": 0.0020789999999999997,
      "loss": 4.8626,
      "step": 693
    },
    {
      "epoch": 0.00694,
      "grad_norm": 0.8731253854999043,
      "learning_rate": 0.002082,
      "loss": 4.8314,
      "step": 694
    },
    {
      "epoch": 0.00695,
      "grad_norm": 1.02924772696324,
      "learning_rate": 0.002085,
      "loss": 4.8388,
      "step": 695
    },
    {
      "epoch": 0.00696,
      "grad_norm": 1.1148665982097032,
      "learning_rate": 0.002088,
      "loss": 4.8532,
      "step": 696
    },
    {
      "epoch": 0.00697,
      "grad_norm": 1.1306884538795905,
      "learning_rate": 0.002091,
      "loss": 4.8556,
      "step": 697
    },
    {
      "epoch": 0.00698,
      "grad_norm": 1.267565149046305,
      "learning_rate": 0.002094,
      "loss": 4.8474,
      "step": 698
    },
    {
      "epoch": 0.00699,
      "grad_norm": 0.9067921371830103,
      "learning_rate": 0.002097,
      "loss": 4.8312,
      "step": 699
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.8797095491461238,
      "learning_rate": 0.0021,
      "loss": 4.8378,
      "step": 700
    },
    {
      "epoch": 0.00701,
      "grad_norm": 0.9764684557107473,
      "learning_rate": 0.002103,
      "loss": 4.8231,
      "step": 701
    },
    {
      "epoch": 0.00702,
      "grad_norm": 0.9975561372265659,
      "learning_rate": 0.002106,
      "loss": 4.8269,
      "step": 702
    },
    {
      "epoch": 0.00703,
      "grad_norm": 0.8409706105195134,
      "learning_rate": 0.0021089999999999998,
      "loss": 4.8046,
      "step": 703
    },
    {
      "epoch": 0.00704,
      "grad_norm": 0.8602349489288845,
      "learning_rate": 0.0021119999999999997,
      "loss": 4.8112,
      "step": 704
    },
    {
      "epoch": 0.00705,
      "grad_norm": 0.810283974291816,
      "learning_rate": 0.002115,
      "loss": 4.8189,
      "step": 705
    },
    {
      "epoch": 0.00706,
      "grad_norm": 0.7904979262360543,
      "learning_rate": 0.002118,
      "loss": 4.8127,
      "step": 706
    },
    {
      "epoch": 0.00707,
      "grad_norm": 0.8638007527609218,
      "learning_rate": 0.002121,
      "loss": 4.7902,
      "step": 707
    },
    {
      "epoch": 0.00708,
      "grad_norm": 0.9274642295068019,
      "learning_rate": 0.002124,
      "loss": 4.7754,
      "step": 708
    },
    {
      "epoch": 0.00709,
      "grad_norm": 0.9265048264631317,
      "learning_rate": 0.002127,
      "loss": 4.8051,
      "step": 709
    },
    {
      "epoch": 0.0071,
      "grad_norm": 1.0021235226233682,
      "learning_rate": 0.00213,
      "loss": 4.8021,
      "step": 710
    },
    {
      "epoch": 0.00711,
      "grad_norm": 0.8616225643918476,
      "learning_rate": 0.002133,
      "loss": 4.7687,
      "step": 711
    },
    {
      "epoch": 0.00712,
      "grad_norm": 1.1509118831082872,
      "learning_rate": 0.002136,
      "loss": 4.8063,
      "step": 712
    },
    {
      "epoch": 0.00713,
      "grad_norm": 0.8979386810595619,
      "learning_rate": 0.002139,
      "loss": 4.8085,
      "step": 713
    },
    {
      "epoch": 0.00714,
      "grad_norm": 0.9672478380991173,
      "learning_rate": 0.002142,
      "loss": 4.7999,
      "step": 714
    },
    {
      "epoch": 0.00715,
      "grad_norm": 1.0283238664767786,
      "learning_rate": 0.0021449999999999998,
      "loss": 4.7746,
      "step": 715
    },
    {
      "epoch": 0.00716,
      "grad_norm": 0.8031836486660412,
      "learning_rate": 0.002148,
      "loss": 4.7611,
      "step": 716
    },
    {
      "epoch": 0.00717,
      "grad_norm": 0.8079202930068127,
      "learning_rate": 0.002151,
      "loss": 4.7802,
      "step": 717
    },
    {
      "epoch": 0.00718,
      "grad_norm": 0.7625878769693025,
      "learning_rate": 0.002154,
      "loss": 4.7607,
      "step": 718
    },
    {
      "epoch": 0.00719,
      "grad_norm": 0.8833410128202536,
      "learning_rate": 0.002157,
      "loss": 4.7767,
      "step": 719
    },
    {
      "epoch": 0.0072,
      "grad_norm": 1.0099718860880083,
      "learning_rate": 0.00216,
      "loss": 4.7733,
      "step": 720
    },
    {
      "epoch": 0.00721,
      "grad_norm": 1.0037155368349988,
      "learning_rate": 0.002163,
      "loss": 4.78,
      "step": 721
    },
    {
      "epoch": 0.00722,
      "grad_norm": 0.8678480066188063,
      "learning_rate": 0.002166,
      "loss": 4.7474,
      "step": 722
    },
    {
      "epoch": 0.00723,
      "grad_norm": 0.9399210002572385,
      "learning_rate": 0.002169,
      "loss": 4.7657,
      "step": 723
    },
    {
      "epoch": 0.00724,
      "grad_norm": 0.8184829699796181,
      "learning_rate": 0.002172,
      "loss": 4.7533,
      "step": 724
    },
    {
      "epoch": 0.00725,
      "grad_norm": 0.8676839571587074,
      "learning_rate": 0.002175,
      "loss": 4.7513,
      "step": 725
    },
    {
      "epoch": 0.00726,
      "grad_norm": 0.9799992988904748,
      "learning_rate": 0.002178,
      "loss": 4.7626,
      "step": 726
    },
    {
      "epoch": 0.00727,
      "grad_norm": 1.2281779129682024,
      "learning_rate": 0.0021809999999999998,
      "loss": 4.7581,
      "step": 727
    },
    {
      "epoch": 0.00728,
      "grad_norm": 1.082945747060172,
      "learning_rate": 0.002184,
      "loss": 4.7657,
      "step": 728
    },
    {
      "epoch": 0.00729,
      "grad_norm": 1.0915510364818644,
      "learning_rate": 0.002187,
      "loss": 4.7617,
      "step": 729
    },
    {
      "epoch": 0.0073,
      "grad_norm": 1.0738468909531949,
      "learning_rate": 0.00219,
      "loss": 4.7676,
      "step": 730
    },
    {
      "epoch": 0.00731,
      "grad_norm": 1.0774407965183543,
      "learning_rate": 0.002193,
      "loss": 4.7572,
      "step": 731
    },
    {
      "epoch": 0.00732,
      "grad_norm": 0.9732910355796593,
      "learning_rate": 0.002196,
      "loss": 4.7638,
      "step": 732
    },
    {
      "epoch": 0.00733,
      "grad_norm": 1.02567545008427,
      "learning_rate": 0.002199,
      "loss": 4.7467,
      "step": 733
    },
    {
      "epoch": 0.00734,
      "grad_norm": 0.9058765241181546,
      "learning_rate": 0.002202,
      "loss": 4.7686,
      "step": 734
    },
    {
      "epoch": 0.00735,
      "grad_norm": 0.9682392169542167,
      "learning_rate": 0.002205,
      "loss": 4.7759,
      "step": 735
    },
    {
      "epoch": 0.00736,
      "grad_norm": 0.9938811207200824,
      "learning_rate": 0.002208,
      "loss": 4.7615,
      "step": 736
    },
    {
      "epoch": 0.00737,
      "grad_norm": 1.0964297344539389,
      "learning_rate": 0.002211,
      "loss": 4.7347,
      "step": 737
    },
    {
      "epoch": 0.00738,
      "grad_norm": 0.8707349769325928,
      "learning_rate": 0.002214,
      "loss": 4.7342,
      "step": 738
    },
    {
      "epoch": 0.00739,
      "grad_norm": 0.7399818233744658,
      "learning_rate": 0.0022170000000000002,
      "loss": 4.717,
      "step": 739
    },
    {
      "epoch": 0.0074,
      "grad_norm": 0.9377539124718652,
      "learning_rate": 0.00222,
      "loss": 4.7301,
      "step": 740
    },
    {
      "epoch": 0.00741,
      "grad_norm": 0.8596400455739317,
      "learning_rate": 0.002223,
      "loss": 4.699,
      "step": 741
    },
    {
      "epoch": 0.00742,
      "grad_norm": 0.6578432901740889,
      "learning_rate": 0.002226,
      "loss": 4.7249,
      "step": 742
    },
    {
      "epoch": 0.00743,
      "grad_norm": 0.6175884285032084,
      "learning_rate": 0.002229,
      "loss": 4.6843,
      "step": 743
    },
    {
      "epoch": 0.00744,
      "grad_norm": 0.6615203369086972,
      "learning_rate": 0.002232,
      "loss": 4.6918,
      "step": 744
    },
    {
      "epoch": 0.00745,
      "grad_norm": 0.6999197355703424,
      "learning_rate": 0.002235,
      "loss": 4.7005,
      "step": 745
    },
    {
      "epoch": 0.00746,
      "grad_norm": 0.7056349857734648,
      "learning_rate": 0.002238,
      "loss": 4.6964,
      "step": 746
    },
    {
      "epoch": 0.00747,
      "grad_norm": 0.6678161149510893,
      "learning_rate": 0.002241,
      "loss": 4.6817,
      "step": 747
    },
    {
      "epoch": 0.00748,
      "grad_norm": 0.6725287147155753,
      "learning_rate": 0.002244,
      "loss": 4.6915,
      "step": 748
    },
    {
      "epoch": 0.00749,
      "grad_norm": 0.7368138262221237,
      "learning_rate": 0.002247,
      "loss": 4.6725,
      "step": 749
    },
    {
      "epoch": 0.0075,
      "grad_norm": 0.7550252977049275,
      "learning_rate": 0.0022500000000000003,
      "loss": 4.7124,
      "step": 750
    },
    {
      "epoch": 0.00751,
      "grad_norm": 0.6461697616177359,
      "learning_rate": 0.0022530000000000002,
      "loss": 4.6948,
      "step": 751
    },
    {
      "epoch": 0.00752,
      "grad_norm": 0.6473605328229959,
      "learning_rate": 0.002256,
      "loss": 4.648,
      "step": 752
    },
    {
      "epoch": 0.00753,
      "grad_norm": 0.8172272904356894,
      "learning_rate": 0.002259,
      "loss": 4.6929,
      "step": 753
    },
    {
      "epoch": 0.00754,
      "grad_norm": 0.8400684755887758,
      "learning_rate": 0.002262,
      "loss": 4.7068,
      "step": 754
    },
    {
      "epoch": 0.00755,
      "grad_norm": 0.7078832518794317,
      "learning_rate": 0.002265,
      "loss": 4.6656,
      "step": 755
    },
    {
      "epoch": 0.00756,
      "grad_norm": 0.6076060117836831,
      "learning_rate": 0.002268,
      "loss": 4.6484,
      "step": 756
    },
    {
      "epoch": 0.00757,
      "grad_norm": 0.7133093934008413,
      "learning_rate": 0.002271,
      "loss": 4.6658,
      "step": 757
    },
    {
      "epoch": 0.00758,
      "grad_norm": 0.7661771348142844,
      "learning_rate": 0.002274,
      "loss": 4.6521,
      "step": 758
    },
    {
      "epoch": 0.00759,
      "grad_norm": 0.9250288948777622,
      "learning_rate": 0.002277,
      "loss": 4.6753,
      "step": 759
    },
    {
      "epoch": 0.0076,
      "grad_norm": 1.037174236565274,
      "learning_rate": 0.00228,
      "loss": 4.669,
      "step": 760
    },
    {
      "epoch": 0.00761,
      "grad_norm": 0.9678315157211191,
      "learning_rate": 0.002283,
      "loss": 4.6392,
      "step": 761
    },
    {
      "epoch": 0.00762,
      "grad_norm": 1.3728001530688312,
      "learning_rate": 0.0022860000000000003,
      "loss": 4.6453,
      "step": 762
    },
    {
      "epoch": 0.00763,
      "grad_norm": 1.0284727877786697,
      "learning_rate": 0.0022890000000000002,
      "loss": 4.6793,
      "step": 763
    },
    {
      "epoch": 0.00764,
      "grad_norm": 0.9914794664489192,
      "learning_rate": 0.002292,
      "loss": 4.6942,
      "step": 764
    },
    {
      "epoch": 0.00765,
      "grad_norm": 0.984322504117537,
      "learning_rate": 0.002295,
      "loss": 4.6765,
      "step": 765
    },
    {
      "epoch": 0.00766,
      "grad_norm": 0.9320893698991433,
      "learning_rate": 0.002298,
      "loss": 4.6792,
      "step": 766
    },
    {
      "epoch": 0.00767,
      "grad_norm": 1.0651442494276249,
      "learning_rate": 0.002301,
      "loss": 4.6823,
      "step": 767
    },
    {
      "epoch": 0.00768,
      "grad_norm": 0.9179111624711317,
      "learning_rate": 0.002304,
      "loss": 4.6817,
      "step": 768
    },
    {
      "epoch": 0.00769,
      "grad_norm": 1.026483766110404,
      "learning_rate": 0.002307,
      "loss": 4.6987,
      "step": 769
    },
    {
      "epoch": 0.0077,
      "grad_norm": 1.1653119571960542,
      "learning_rate": 0.00231,
      "loss": 4.6621,
      "step": 770
    },
    {
      "epoch": 0.00771,
      "grad_norm": 0.8477238808348645,
      "learning_rate": 0.002313,
      "loss": 4.689,
      "step": 771
    },
    {
      "epoch": 0.00772,
      "grad_norm": 0.6519421622488206,
      "learning_rate": 0.002316,
      "loss": 4.6631,
      "step": 772
    },
    {
      "epoch": 0.00773,
      "grad_norm": 0.6177861857364649,
      "learning_rate": 0.0023190000000000003,
      "loss": 4.6627,
      "step": 773
    },
    {
      "epoch": 0.00774,
      "grad_norm": 0.6901665734497584,
      "learning_rate": 0.0023220000000000003,
      "loss": 4.6775,
      "step": 774
    },
    {
      "epoch": 0.00775,
      "grad_norm": 0.7356087164350898,
      "learning_rate": 0.0023250000000000002,
      "loss": 4.6725,
      "step": 775
    },
    {
      "epoch": 0.00776,
      "grad_norm": 0.8693432194982287,
      "learning_rate": 0.002328,
      "loss": 4.6722,
      "step": 776
    },
    {
      "epoch": 0.00777,
      "grad_norm": 1.099570573598906,
      "learning_rate": 0.002331,
      "loss": 4.6596,
      "step": 777
    },
    {
      "epoch": 0.00778,
      "grad_norm": 1.0694357671416344,
      "learning_rate": 0.002334,
      "loss": 4.6725,
      "step": 778
    },
    {
      "epoch": 0.00779,
      "grad_norm": 1.0144407661707453,
      "learning_rate": 0.002337,
      "loss": 4.6345,
      "step": 779
    },
    {
      "epoch": 0.0078,
      "grad_norm": 0.9392788681741788,
      "learning_rate": 0.00234,
      "loss": 4.6579,
      "step": 780
    },
    {
      "epoch": 0.00781,
      "grad_norm": 0.9039044158767507,
      "learning_rate": 0.002343,
      "loss": 4.6528,
      "step": 781
    },
    {
      "epoch": 0.00782,
      "grad_norm": 0.9671545635863801,
      "learning_rate": 0.002346,
      "loss": 4.6552,
      "step": 782
    },
    {
      "epoch": 0.00783,
      "grad_norm": 0.9789758043675277,
      "learning_rate": 0.002349,
      "loss": 4.6404,
      "step": 783
    },
    {
      "epoch": 0.00784,
      "grad_norm": 0.7674708275760124,
      "learning_rate": 0.002352,
      "loss": 4.6449,
      "step": 784
    },
    {
      "epoch": 0.00785,
      "grad_norm": 0.6509064102772842,
      "learning_rate": 0.0023550000000000003,
      "loss": 4.6391,
      "step": 785
    },
    {
      "epoch": 0.00786,
      "grad_norm": 0.6282839162170526,
      "learning_rate": 0.0023580000000000003,
      "loss": 4.6356,
      "step": 786
    },
    {
      "epoch": 0.00787,
      "grad_norm": 0.6520823986249177,
      "learning_rate": 0.0023610000000000003,
      "loss": 4.6384,
      "step": 787
    },
    {
      "epoch": 0.00788,
      "grad_norm": 0.6693545084259133,
      "learning_rate": 0.002364,
      "loss": 4.6342,
      "step": 788
    },
    {
      "epoch": 0.00789,
      "grad_norm": 0.7061838607079715,
      "learning_rate": 0.002367,
      "loss": 4.6597,
      "step": 789
    },
    {
      "epoch": 0.0079,
      "grad_norm": 0.7001074671969121,
      "learning_rate": 0.00237,
      "loss": 4.6333,
      "step": 790
    },
    {
      "epoch": 0.00791,
      "grad_norm": 0.6934841557036142,
      "learning_rate": 0.002373,
      "loss": 4.5873,
      "step": 791
    },
    {
      "epoch": 0.00792,
      "grad_norm": 0.6555126375785874,
      "learning_rate": 0.002376,
      "loss": 4.6158,
      "step": 792
    },
    {
      "epoch": 0.00793,
      "grad_norm": 0.6198619755446345,
      "learning_rate": 0.002379,
      "loss": 4.6082,
      "step": 793
    },
    {
      "epoch": 0.00794,
      "grad_norm": 0.5540734864838481,
      "learning_rate": 0.002382,
      "loss": 4.615,
      "step": 794
    },
    {
      "epoch": 0.00795,
      "grad_norm": 0.5756469670820633,
      "learning_rate": 0.002385,
      "loss": 4.5927,
      "step": 795
    },
    {
      "epoch": 0.00796,
      "grad_norm": 0.5912602735893169,
      "learning_rate": 0.0023880000000000004,
      "loss": 4.5839,
      "step": 796
    },
    {
      "epoch": 0.00797,
      "grad_norm": 0.6477479187436139,
      "learning_rate": 0.0023910000000000003,
      "loss": 4.628,
      "step": 797
    },
    {
      "epoch": 0.00798,
      "grad_norm": 0.6568610896012951,
      "learning_rate": 0.0023940000000000003,
      "loss": 4.5975,
      "step": 798
    },
    {
      "epoch": 0.00799,
      "grad_norm": 0.8964867071559416,
      "learning_rate": 0.0023970000000000003,
      "loss": 4.6327,
      "step": 799
    },
    {
      "epoch": 0.008,
      "grad_norm": 1.1130272400690795,
      "learning_rate": 0.0024000000000000002,
      "loss": 4.6127,
      "step": 800
    },
    {
      "epoch": 0.00801,
      "grad_norm": 0.8936330318178004,
      "learning_rate": 0.002403,
      "loss": 4.624,
      "step": 801
    },
    {
      "epoch": 0.00802,
      "grad_norm": 1.279259611288336,
      "learning_rate": 0.002406,
      "loss": 4.6431,
      "step": 802
    },
    {
      "epoch": 0.00803,
      "grad_norm": 0.7588568023085343,
      "learning_rate": 0.002409,
      "loss": 4.5967,
      "step": 803
    },
    {
      "epoch": 0.00804,
      "grad_norm": 0.9911172738466991,
      "learning_rate": 0.002412,
      "loss": 4.6083,
      "step": 804
    },
    {
      "epoch": 0.00805,
      "grad_norm": 1.0297855025384943,
      "learning_rate": 0.002415,
      "loss": 4.6095,
      "step": 805
    },
    {
      "epoch": 0.00806,
      "grad_norm": 0.8656546102408385,
      "learning_rate": 0.002418,
      "loss": 4.6231,
      "step": 806
    },
    {
      "epoch": 0.00807,
      "grad_norm": 0.7922998530369505,
      "learning_rate": 0.0024210000000000004,
      "loss": 4.6144,
      "step": 807
    },
    {
      "epoch": 0.00808,
      "grad_norm": 0.7119238411669042,
      "learning_rate": 0.0024240000000000004,
      "loss": 4.5933,
      "step": 808
    },
    {
      "epoch": 0.00809,
      "grad_norm": 0.6923611180056076,
      "learning_rate": 0.0024270000000000003,
      "loss": 4.5855,
      "step": 809
    },
    {
      "epoch": 0.0081,
      "grad_norm": 0.759382946117684,
      "learning_rate": 0.0024300000000000003,
      "loss": 4.6149,
      "step": 810
    },
    {
      "epoch": 0.00811,
      "grad_norm": 0.9629068291697402,
      "learning_rate": 0.0024330000000000003,
      "loss": 4.5955,
      "step": 811
    },
    {
      "epoch": 0.00812,
      "grad_norm": 0.9897740547487952,
      "learning_rate": 0.0024360000000000002,
      "loss": 4.5793,
      "step": 812
    },
    {
      "epoch": 0.00813,
      "grad_norm": 1.0343029805479964,
      "learning_rate": 0.0024389999999999998,
      "loss": 4.6192,
      "step": 813
    },
    {
      "epoch": 0.00814,
      "grad_norm": 1.188458156645309,
      "learning_rate": 0.0024419999999999997,
      "loss": 4.6409,
      "step": 814
    },
    {
      "epoch": 0.00815,
      "grad_norm": 0.960728927074141,
      "learning_rate": 0.0024449999999999997,
      "loss": 4.6319,
      "step": 815
    },
    {
      "epoch": 0.00816,
      "grad_norm": 1.0099133377105225,
      "learning_rate": 0.002448,
      "loss": 4.6186,
      "step": 816
    },
    {
      "epoch": 0.00817,
      "grad_norm": 0.9311887346236664,
      "learning_rate": 0.002451,
      "loss": 4.6246,
      "step": 817
    },
    {
      "epoch": 0.00818,
      "grad_norm": 1.2609797944179002,
      "learning_rate": 0.002454,
      "loss": 4.625,
      "step": 818
    },
    {
      "epoch": 0.00819,
      "grad_norm": 1.0447566978787928,
      "learning_rate": 0.002457,
      "loss": 4.6401,
      "step": 819
    },
    {
      "epoch": 0.0082,
      "grad_norm": 1.1000322233283122,
      "learning_rate": 0.00246,
      "loss": 4.6239,
      "step": 820
    },
    {
      "epoch": 0.00821,
      "grad_norm": 0.8928816997822231,
      "learning_rate": 0.002463,
      "loss": 4.5932,
      "step": 821
    },
    {
      "epoch": 0.00822,
      "grad_norm": 0.9105337296182261,
      "learning_rate": 0.002466,
      "loss": 4.5996,
      "step": 822
    },
    {
      "epoch": 0.00823,
      "grad_norm": 0.6537296194510775,
      "learning_rate": 0.002469,
      "loss": 4.5905,
      "step": 823
    },
    {
      "epoch": 0.00824,
      "grad_norm": 0.6615102644633621,
      "learning_rate": 0.002472,
      "loss": 4.61,
      "step": 824
    },
    {
      "epoch": 0.00825,
      "grad_norm": 0.7605760557018463,
      "learning_rate": 0.0024749999999999998,
      "loss": 4.6148,
      "step": 825
    },
    {
      "epoch": 0.00826,
      "grad_norm": 0.9288808111624368,
      "learning_rate": 0.0024779999999999997,
      "loss": 4.5806,
      "step": 826
    },
    {
      "epoch": 0.00827,
      "grad_norm": 0.8099205132023622,
      "learning_rate": 0.002481,
      "loss": 4.6084,
      "step": 827
    },
    {
      "epoch": 0.00828,
      "grad_norm": 0.7444084275861881,
      "learning_rate": 0.002484,
      "loss": 4.5965,
      "step": 828
    },
    {
      "epoch": 0.00829,
      "grad_norm": 0.6438605280155706,
      "learning_rate": 0.002487,
      "loss": 4.5891,
      "step": 829
    },
    {
      "epoch": 0.0083,
      "grad_norm": 0.6242869251575957,
      "learning_rate": 0.00249,
      "loss": 4.5955,
      "step": 830
    },
    {
      "epoch": 0.00831,
      "grad_norm": 0.6705073637500226,
      "learning_rate": 0.002493,
      "loss": 4.576,
      "step": 831
    },
    {
      "epoch": 0.00832,
      "grad_norm": 0.7603459804613621,
      "learning_rate": 0.002496,
      "loss": 4.5759,
      "step": 832
    },
    {
      "epoch": 0.00833,
      "grad_norm": 0.7662808437283888,
      "learning_rate": 0.002499,
      "loss": 4.5696,
      "step": 833
    },
    {
      "epoch": 0.00834,
      "grad_norm": 0.8012380275176963,
      "learning_rate": 0.002502,
      "loss": 4.5743,
      "step": 834
    },
    {
      "epoch": 0.00835,
      "grad_norm": 1.0075156101089233,
      "learning_rate": 0.002505,
      "loss": 4.5783,
      "step": 835
    },
    {
      "epoch": 0.00836,
      "grad_norm": 1.0847073328294785,
      "learning_rate": 0.002508,
      "loss": 4.6155,
      "step": 836
    },
    {
      "epoch": 0.00837,
      "grad_norm": 0.8849400794535106,
      "learning_rate": 0.0025109999999999998,
      "loss": 4.5603,
      "step": 837
    },
    {
      "epoch": 0.00838,
      "grad_norm": 0.8832989831172253,
      "learning_rate": 0.0025139999999999997,
      "loss": 4.5816,
      "step": 838
    },
    {
      "epoch": 0.00839,
      "grad_norm": 0.9511730781538094,
      "learning_rate": 0.002517,
      "loss": 4.5843,
      "step": 839
    },
    {
      "epoch": 0.0084,
      "grad_norm": 1.037817208386755,
      "learning_rate": 0.00252,
      "loss": 4.5831,
      "step": 840
    },
    {
      "epoch": 0.00841,
      "grad_norm": 0.8472466492390959,
      "learning_rate": 0.002523,
      "loss": 4.5624,
      "step": 841
    },
    {
      "epoch": 0.00842,
      "grad_norm": 0.9269217528832409,
      "learning_rate": 0.002526,
      "loss": 4.5661,
      "step": 842
    },
    {
      "epoch": 0.00843,
      "grad_norm": 0.8716247137682855,
      "learning_rate": 0.002529,
      "loss": 4.5688,
      "step": 843
    },
    {
      "epoch": 0.00844,
      "grad_norm": 0.7416972066179122,
      "learning_rate": 0.002532,
      "loss": 4.585,
      "step": 844
    },
    {
      "epoch": 0.00845,
      "grad_norm": 0.6177824387251759,
      "learning_rate": 0.002535,
      "loss": 4.5442,
      "step": 845
    },
    {
      "epoch": 0.00846,
      "grad_norm": 0.5865348849804463,
      "learning_rate": 0.002538,
      "loss": 4.5656,
      "step": 846
    },
    {
      "epoch": 0.00847,
      "grad_norm": 0.5324615561823309,
      "learning_rate": 0.002541,
      "loss": 4.5442,
      "step": 847
    },
    {
      "epoch": 0.00848,
      "grad_norm": 0.5568583953268653,
      "learning_rate": 0.002544,
      "loss": 4.5407,
      "step": 848
    },
    {
      "epoch": 0.00849,
      "grad_norm": 0.600307108588631,
      "learning_rate": 0.002547,
      "loss": 4.5371,
      "step": 849
    },
    {
      "epoch": 0.0085,
      "grad_norm": 0.5343909127282478,
      "learning_rate": 0.00255,
      "loss": 4.5435,
      "step": 850
    },
    {
      "epoch": 0.00851,
      "grad_norm": 0.5790732432599173,
      "learning_rate": 0.002553,
      "loss": 4.5588,
      "step": 851
    },
    {
      "epoch": 0.00852,
      "grad_norm": 0.6406298666409433,
      "learning_rate": 0.002556,
      "loss": 4.5503,
      "step": 852
    },
    {
      "epoch": 0.00853,
      "grad_norm": 0.6711876835719848,
      "learning_rate": 0.002559,
      "loss": 4.5298,
      "step": 853
    },
    {
      "epoch": 0.00854,
      "grad_norm": 0.643558144142947,
      "learning_rate": 0.002562,
      "loss": 4.5219,
      "step": 854
    },
    {
      "epoch": 0.00855,
      "grad_norm": 0.621905866188772,
      "learning_rate": 0.002565,
      "loss": 4.5026,
      "step": 855
    },
    {
      "epoch": 0.00856,
      "grad_norm": 0.7503391003054042,
      "learning_rate": 0.002568,
      "loss": 4.5375,
      "step": 856
    },
    {
      "epoch": 0.00857,
      "grad_norm": 0.90802719077466,
      "learning_rate": 0.002571,
      "loss": 4.5344,
      "step": 857
    },
    {
      "epoch": 0.00858,
      "grad_norm": 0.9157789056738207,
      "learning_rate": 0.002574,
      "loss": 4.5308,
      "step": 858
    },
    {
      "epoch": 0.00859,
      "grad_norm": 0.8455467899517649,
      "learning_rate": 0.002577,
      "loss": 4.5208,
      "step": 859
    },
    {
      "epoch": 0.0086,
      "grad_norm": 0.929986585155377,
      "learning_rate": 0.00258,
      "loss": 4.5557,
      "step": 860
    },
    {
      "epoch": 0.00861,
      "grad_norm": 1.062443351155347,
      "learning_rate": 0.0025830000000000002,
      "loss": 4.5815,
      "step": 861
    },
    {
      "epoch": 0.00862,
      "grad_norm": 1.0476479971551458,
      "learning_rate": 0.002586,
      "loss": 4.5382,
      "step": 862
    },
    {
      "epoch": 0.00863,
      "grad_norm": 0.9516272804079478,
      "learning_rate": 0.002589,
      "loss": 4.5686,
      "step": 863
    },
    {
      "epoch": 0.00864,
      "grad_norm": 1.0775847362135182,
      "learning_rate": 0.002592,
      "loss": 4.5222,
      "step": 864
    },
    {
      "epoch": 0.00865,
      "grad_norm": 0.902924351244226,
      "learning_rate": 0.002595,
      "loss": 4.5357,
      "step": 865
    },
    {
      "epoch": 0.00866,
      "grad_norm": 0.889409265471235,
      "learning_rate": 0.002598,
      "loss": 4.5623,
      "step": 866
    },
    {
      "epoch": 0.00867,
      "grad_norm": 1.1312843257696636,
      "learning_rate": 0.002601,
      "loss": 4.5565,
      "step": 867
    },
    {
      "epoch": 0.00868,
      "grad_norm": 1.0195609890673947,
      "learning_rate": 0.002604,
      "loss": 4.5479,
      "step": 868
    },
    {
      "epoch": 0.00869,
      "grad_norm": 0.8378311515658349,
      "learning_rate": 0.002607,
      "loss": 4.5492,
      "step": 869
    },
    {
      "epoch": 0.0087,
      "grad_norm": 0.93569973985326,
      "learning_rate": 0.00261,
      "loss": 4.5413,
      "step": 870
    },
    {
      "epoch": 0.00871,
      "grad_norm": 0.9947247250751194,
      "learning_rate": 0.002613,
      "loss": 4.5608,
      "step": 871
    },
    {
      "epoch": 0.00872,
      "grad_norm": 0.9456334146879876,
      "learning_rate": 0.002616,
      "loss": 4.5489,
      "step": 872
    },
    {
      "epoch": 0.00873,
      "grad_norm": 0.9088952463307589,
      "learning_rate": 0.0026190000000000002,
      "loss": 4.5587,
      "step": 873
    },
    {
      "epoch": 0.00874,
      "grad_norm": 0.7636008668853458,
      "learning_rate": 0.002622,
      "loss": 4.5702,
      "step": 874
    },
    {
      "epoch": 0.00875,
      "grad_norm": 0.8446171091890929,
      "learning_rate": 0.002625,
      "loss": 4.5191,
      "step": 875
    },
    {
      "epoch": 0.00876,
      "grad_norm": 0.8731048122579586,
      "learning_rate": 0.002628,
      "loss": 4.551,
      "step": 876
    },
    {
      "epoch": 0.00877,
      "grad_norm": 0.9444127454444267,
      "learning_rate": 0.002631,
      "loss": 4.5701,
      "step": 877
    },
    {
      "epoch": 0.00878,
      "grad_norm": 0.9863621265162925,
      "learning_rate": 0.002634,
      "loss": 4.5108,
      "step": 878
    },
    {
      "epoch": 0.00879,
      "grad_norm": 0.7746522972030878,
      "learning_rate": 0.002637,
      "loss": 4.5293,
      "step": 879
    },
    {
      "epoch": 0.0088,
      "grad_norm": 0.7150869014843059,
      "learning_rate": 0.00264,
      "loss": 4.5368,
      "step": 880
    },
    {
      "epoch": 0.00881,
      "grad_norm": 0.866142425879982,
      "learning_rate": 0.002643,
      "loss": 4.5448,
      "step": 881
    },
    {
      "epoch": 0.00882,
      "grad_norm": 1.0565004714930601,
      "learning_rate": 0.002646,
      "loss": 4.5522,
      "step": 882
    },
    {
      "epoch": 0.00883,
      "grad_norm": 0.88323512650563,
      "learning_rate": 0.002649,
      "loss": 4.5382,
      "step": 883
    },
    {
      "epoch": 0.00884,
      "grad_norm": 0.8377440854137777,
      "learning_rate": 0.0026520000000000003,
      "loss": 4.5227,
      "step": 884
    },
    {
      "epoch": 0.00885,
      "grad_norm": 0.8587569363150891,
      "learning_rate": 0.0026550000000000002,
      "loss": 4.5189,
      "step": 885
    },
    {
      "epoch": 0.00886,
      "grad_norm": 0.9455034420832737,
      "learning_rate": 0.002658,
      "loss": 4.5539,
      "step": 886
    },
    {
      "epoch": 0.00887,
      "grad_norm": 0.8258368480900744,
      "learning_rate": 0.002661,
      "loss": 4.536,
      "step": 887
    },
    {
      "epoch": 0.00888,
      "grad_norm": 0.927023832062946,
      "learning_rate": 0.002664,
      "loss": 4.5392,
      "step": 888
    },
    {
      "epoch": 0.00889,
      "grad_norm": 0.9905400773231482,
      "learning_rate": 0.002667,
      "loss": 4.5077,
      "step": 889
    },
    {
      "epoch": 0.0089,
      "grad_norm": 1.0181625806478707,
      "learning_rate": 0.00267,
      "loss": 4.551,
      "step": 890
    },
    {
      "epoch": 0.00891,
      "grad_norm": 1.0618776306697646,
      "learning_rate": 0.002673,
      "loss": 4.5446,
      "step": 891
    },
    {
      "epoch": 0.00892,
      "grad_norm": 0.9464629097549706,
      "learning_rate": 0.002676,
      "loss": 4.5493,
      "step": 892
    },
    {
      "epoch": 0.00893,
      "grad_norm": 1.02959831042168,
      "learning_rate": 0.002679,
      "loss": 4.5321,
      "step": 893
    },
    {
      "epoch": 0.00894,
      "grad_norm": 0.8717588229222071,
      "learning_rate": 0.002682,
      "loss": 4.5126,
      "step": 894
    },
    {
      "epoch": 0.00895,
      "grad_norm": 0.7766302283006379,
      "learning_rate": 0.0026850000000000003,
      "loss": 4.5191,
      "step": 895
    },
    {
      "epoch": 0.00896,
      "grad_norm": 0.7210349653989065,
      "learning_rate": 0.0026880000000000003,
      "loss": 4.5061,
      "step": 896
    },
    {
      "epoch": 0.00897,
      "grad_norm": 0.7840425119747009,
      "learning_rate": 0.0026910000000000002,
      "loss": 4.5249,
      "step": 897
    },
    {
      "epoch": 0.00898,
      "grad_norm": 0.7907811575022647,
      "learning_rate": 0.002694,
      "loss": 4.518,
      "step": 898
    },
    {
      "epoch": 0.00899,
      "grad_norm": 0.7866169386193732,
      "learning_rate": 0.002697,
      "loss": 4.5195,
      "step": 899
    },
    {
      "epoch": 0.009,
      "grad_norm": 0.7303478296484299,
      "learning_rate": 0.0027,
      "loss": 4.5449,
      "step": 900
    },
    {
      "epoch": 0.00901,
      "grad_norm": 0.6444049592088867,
      "learning_rate": 0.002703,
      "loss": 4.4943,
      "step": 901
    },
    {
      "epoch": 0.00902,
      "grad_norm": 0.5222361304775145,
      "learning_rate": 0.002706,
      "loss": 4.498,
      "step": 902
    },
    {
      "epoch": 0.00903,
      "grad_norm": 0.6152635474063327,
      "learning_rate": 0.002709,
      "loss": 4.4967,
      "step": 903
    },
    {
      "epoch": 0.00904,
      "grad_norm": 0.6931791624762859,
      "learning_rate": 0.002712,
      "loss": 4.4818,
      "step": 904
    },
    {
      "epoch": 0.00905,
      "grad_norm": 0.6467753244335184,
      "learning_rate": 0.002715,
      "loss": 4.4929,
      "step": 905
    },
    {
      "epoch": 0.00906,
      "grad_norm": 0.6638188976320862,
      "learning_rate": 0.002718,
      "loss": 4.5049,
      "step": 906
    },
    {
      "epoch": 0.00907,
      "grad_norm": 0.503631484987646,
      "learning_rate": 0.0027210000000000003,
      "loss": 4.5121,
      "step": 907
    },
    {
      "epoch": 0.00908,
      "grad_norm": 0.4745734069368428,
      "learning_rate": 0.0027240000000000003,
      "loss": 4.481,
      "step": 908
    },
    {
      "epoch": 0.00909,
      "grad_norm": 0.5648420216556126,
      "learning_rate": 0.0027270000000000003,
      "loss": 4.5013,
      "step": 909
    },
    {
      "epoch": 0.0091,
      "grad_norm": 0.5471966544704259,
      "learning_rate": 0.0027300000000000002,
      "loss": 4.5027,
      "step": 910
    },
    {
      "epoch": 0.00911,
      "grad_norm": 0.6580432739675335,
      "learning_rate": 0.002733,
      "loss": 4.4619,
      "step": 911
    },
    {
      "epoch": 0.00912,
      "grad_norm": 0.652804172410743,
      "learning_rate": 0.002736,
      "loss": 4.4678,
      "step": 912
    },
    {
      "epoch": 0.00913,
      "grad_norm": 0.6108886726833864,
      "learning_rate": 0.002739,
      "loss": 4.4601,
      "step": 913
    },
    {
      "epoch": 0.00914,
      "grad_norm": 0.5246452629450868,
      "learning_rate": 0.002742,
      "loss": 4.4865,
      "step": 914
    },
    {
      "epoch": 0.00915,
      "grad_norm": 0.6046562570851716,
      "learning_rate": 0.002745,
      "loss": 4.4501,
      "step": 915
    },
    {
      "epoch": 0.00916,
      "grad_norm": 0.6470672223067736,
      "learning_rate": 0.002748,
      "loss": 4.4304,
      "step": 916
    },
    {
      "epoch": 0.00917,
      "grad_norm": 0.6458845430146066,
      "learning_rate": 0.002751,
      "loss": 4.4431,
      "step": 917
    },
    {
      "epoch": 0.00918,
      "grad_norm": 0.6107710101855935,
      "learning_rate": 0.0027540000000000004,
      "loss": 4.4622,
      "step": 918
    },
    {
      "epoch": 0.00919,
      "grad_norm": 0.6056157888916294,
      "learning_rate": 0.0027570000000000003,
      "loss": 4.4436,
      "step": 919
    },
    {
      "epoch": 0.0092,
      "grad_norm": 0.6858065732895877,
      "learning_rate": 0.0027600000000000003,
      "loss": 4.462,
      "step": 920
    },
    {
      "epoch": 0.00921,
      "grad_norm": 0.9391926745722488,
      "learning_rate": 0.0027630000000000003,
      "loss": 4.4556,
      "step": 921
    },
    {
      "epoch": 0.00922,
      "grad_norm": 1.1348542218598812,
      "learning_rate": 0.0027660000000000002,
      "loss": 4.5063,
      "step": 922
    },
    {
      "epoch": 0.00923,
      "grad_norm": 1.0421503736233508,
      "learning_rate": 0.002769,
      "loss": 4.4713,
      "step": 923
    },
    {
      "epoch": 0.00924,
      "grad_norm": 1.0588478925879097,
      "learning_rate": 0.002772,
      "loss": 4.4959,
      "step": 924
    },
    {
      "epoch": 0.00925,
      "grad_norm": 0.9781832929515508,
      "learning_rate": 0.002775,
      "loss": 4.4806,
      "step": 925
    },
    {
      "epoch": 0.00926,
      "grad_norm": 0.7776497655560727,
      "learning_rate": 0.002778,
      "loss": 4.488,
      "step": 926
    },
    {
      "epoch": 0.00927,
      "grad_norm": 0.7034519817521436,
      "learning_rate": 0.002781,
      "loss": 4.4448,
      "step": 927
    },
    {
      "epoch": 0.00928,
      "grad_norm": 0.9270286412247504,
      "learning_rate": 0.002784,
      "loss": 4.4965,
      "step": 928
    },
    {
      "epoch": 0.00929,
      "grad_norm": 1.0728170784278697,
      "learning_rate": 0.0027870000000000004,
      "loss": 4.4869,
      "step": 929
    },
    {
      "epoch": 0.0093,
      "grad_norm": 0.9492127377122095,
      "learning_rate": 0.0027900000000000004,
      "loss": 4.4732,
      "step": 930
    },
    {
      "epoch": 0.00931,
      "grad_norm": 0.9513900088751025,
      "learning_rate": 0.0027930000000000003,
      "loss": 4.4833,
      "step": 931
    },
    {
      "epoch": 0.00932,
      "grad_norm": 1.4536440294804005,
      "learning_rate": 0.0027960000000000003,
      "loss": 4.5168,
      "step": 932
    },
    {
      "epoch": 0.00933,
      "grad_norm": 1.132862147568844,
      "learning_rate": 0.0027990000000000003,
      "loss": 4.4769,
      "step": 933
    },
    {
      "epoch": 0.00934,
      "grad_norm": 0.8513409648274727,
      "learning_rate": 0.0028020000000000002,
      "loss": 4.4941,
      "step": 934
    },
    {
      "epoch": 0.00935,
      "grad_norm": 0.8677938211616196,
      "learning_rate": 0.002805,
      "loss": 4.5057,
      "step": 935
    },
    {
      "epoch": 0.00936,
      "grad_norm": 0.8298971693271944,
      "learning_rate": 0.002808,
      "loss": 4.5081,
      "step": 936
    },
    {
      "epoch": 0.00937,
      "grad_norm": 0.8363829119527492,
      "learning_rate": 0.002811,
      "loss": 4.521,
      "step": 937
    },
    {
      "epoch": 0.00938,
      "grad_norm": 1.0036953395826609,
      "learning_rate": 0.002814,
      "loss": 4.5006,
      "step": 938
    },
    {
      "epoch": 0.00939,
      "grad_norm": 1.0054078613176451,
      "learning_rate": 0.002817,
      "loss": 4.5073,
      "step": 939
    },
    {
      "epoch": 0.0094,
      "grad_norm": 0.8667374621688471,
      "learning_rate": 0.00282,
      "loss": 4.4958,
      "step": 940
    },
    {
      "epoch": 0.00941,
      "grad_norm": 0.9205808599892458,
      "learning_rate": 0.002823,
      "loss": 4.4733,
      "step": 941
    },
    {
      "epoch": 0.00942,
      "grad_norm": 0.9778408651584425,
      "learning_rate": 0.002826,
      "loss": 4.5008,
      "step": 942
    },
    {
      "epoch": 0.00943,
      "grad_norm": 1.082550194860624,
      "learning_rate": 0.002829,
      "loss": 4.4958,
      "step": 943
    },
    {
      "epoch": 0.00944,
      "grad_norm": 1.355725245571646,
      "learning_rate": 0.002832,
      "loss": 4.5179,
      "step": 944
    },
    {
      "epoch": 0.00945,
      "grad_norm": 0.8345488861943434,
      "learning_rate": 0.002835,
      "loss": 4.5015,
      "step": 945
    },
    {
      "epoch": 0.00946,
      "grad_norm": 1.0009931674850154,
      "learning_rate": 0.002838,
      "loss": 4.5193,
      "step": 946
    },
    {
      "epoch": 0.00947,
      "grad_norm": 1.0176650405493206,
      "learning_rate": 0.0028409999999999998,
      "loss": 4.4912,
      "step": 947
    },
    {
      "epoch": 0.00948,
      "grad_norm": 0.836896216552642,
      "learning_rate": 0.0028439999999999997,
      "loss": 4.472,
      "step": 948
    },
    {
      "epoch": 0.00949,
      "grad_norm": 0.6881971993105954,
      "learning_rate": 0.002847,
      "loss": 4.5057,
      "step": 949
    },
    {
      "epoch": 0.0095,
      "grad_norm": 0.8926787293989068,
      "learning_rate": 0.00285,
      "loss": 4.5027,
      "step": 950
    },
    {
      "epoch": 0.00951,
      "grad_norm": 1.0437772730376889,
      "learning_rate": 0.002853,
      "loss": 4.4861,
      "step": 951
    },
    {
      "epoch": 0.00952,
      "grad_norm": 0.8745743913439339,
      "learning_rate": 0.002856,
      "loss": 4.5041,
      "step": 952
    },
    {
      "epoch": 0.00953,
      "grad_norm": 0.7847706213592531,
      "learning_rate": 0.002859,
      "loss": 4.4446,
      "step": 953
    },
    {
      "epoch": 0.00954,
      "grad_norm": 0.6236105730880978,
      "learning_rate": 0.002862,
      "loss": 4.4945,
      "step": 954
    },
    {
      "epoch": 0.00955,
      "grad_norm": 0.5696186051972435,
      "learning_rate": 0.002865,
      "loss": 4.475,
      "step": 955
    },
    {
      "epoch": 0.00956,
      "grad_norm": 0.5459272314199634,
      "learning_rate": 0.002868,
      "loss": 4.4518,
      "step": 956
    },
    {
      "epoch": 0.00957,
      "grad_norm": 0.5100325019322003,
      "learning_rate": 0.002871,
      "loss": 4.4721,
      "step": 957
    },
    {
      "epoch": 0.00958,
      "grad_norm": 0.6617022302690957,
      "learning_rate": 0.002874,
      "loss": 4.4567,
      "step": 958
    },
    {
      "epoch": 0.00959,
      "grad_norm": 0.7948420381771908,
      "learning_rate": 0.002877,
      "loss": 4.4693,
      "step": 959
    },
    {
      "epoch": 0.0096,
      "grad_norm": 1.0277128972108451,
      "learning_rate": 0.0028799999999999997,
      "loss": 4.4747,
      "step": 960
    },
    {
      "epoch": 0.00961,
      "grad_norm": 0.9925275083373442,
      "learning_rate": 0.002883,
      "loss": 4.4507,
      "step": 961
    },
    {
      "epoch": 0.00962,
      "grad_norm": 1.0177847800658486,
      "learning_rate": 0.002886,
      "loss": 4.4861,
      "step": 962
    },
    {
      "epoch": 0.00963,
      "grad_norm": 1.0118802628275685,
      "learning_rate": 0.002889,
      "loss": 4.4865,
      "step": 963
    },
    {
      "epoch": 0.00964,
      "grad_norm": 1.1856958623453784,
      "learning_rate": 0.002892,
      "loss": 4.4868,
      "step": 964
    },
    {
      "epoch": 0.00965,
      "grad_norm": 0.8341279969334199,
      "learning_rate": 0.002895,
      "loss": 4.4524,
      "step": 965
    },
    {
      "epoch": 0.00966,
      "grad_norm": 0.6756655743310646,
      "learning_rate": 0.002898,
      "loss": 4.4552,
      "step": 966
    },
    {
      "epoch": 0.00967,
      "grad_norm": 0.6634929266596646,
      "learning_rate": 0.002901,
      "loss": 4.4933,
      "step": 967
    },
    {
      "epoch": 0.00968,
      "grad_norm": 0.6850072615364151,
      "learning_rate": 0.002904,
      "loss": 4.4564,
      "step": 968
    },
    {
      "epoch": 0.00969,
      "grad_norm": 0.6166935294692494,
      "learning_rate": 0.002907,
      "loss": 4.4862,
      "step": 969
    },
    {
      "epoch": 0.0097,
      "grad_norm": 0.6616720297110108,
      "learning_rate": 0.00291,
      "loss": 4.4401,
      "step": 970
    },
    {
      "epoch": 0.00971,
      "grad_norm": 0.5463010292381552,
      "learning_rate": 0.002913,
      "loss": 4.4415,
      "step": 971
    },
    {
      "epoch": 0.00972,
      "grad_norm": 0.5115285821904162,
      "learning_rate": 0.002916,
      "loss": 4.4454,
      "step": 972
    },
    {
      "epoch": 0.00973,
      "grad_norm": 0.543781975527911,
      "learning_rate": 0.002919,
      "loss": 4.4402,
      "step": 973
    },
    {
      "epoch": 0.00974,
      "grad_norm": 0.5167263203162235,
      "learning_rate": 0.002922,
      "loss": 4.4246,
      "step": 974
    },
    {
      "epoch": 0.00975,
      "grad_norm": 0.4676730441900605,
      "learning_rate": 0.002925,
      "loss": 4.4434,
      "step": 975
    },
    {
      "epoch": 0.00976,
      "grad_norm": 0.4483140513716565,
      "learning_rate": 0.002928,
      "loss": 4.4259,
      "step": 976
    },
    {
      "epoch": 0.00977,
      "grad_norm": 0.45300675248114197,
      "learning_rate": 0.002931,
      "loss": 4.4582,
      "step": 977
    },
    {
      "epoch": 0.00978,
      "grad_norm": 0.524704391185419,
      "learning_rate": 0.002934,
      "loss": 4.4335,
      "step": 978
    },
    {
      "epoch": 0.00979,
      "grad_norm": 0.6643809565846889,
      "learning_rate": 0.002937,
      "loss": 4.4266,
      "step": 979
    },
    {
      "epoch": 0.0098,
      "grad_norm": 0.9159629342753403,
      "learning_rate": 0.00294,
      "loss": 4.433,
      "step": 980
    },
    {
      "epoch": 0.00981,
      "grad_norm": 1.0134318026936866,
      "learning_rate": 0.002943,
      "loss": 4.4516,
      "step": 981
    },
    {
      "epoch": 0.00982,
      "grad_norm": 0.7532241089289973,
      "learning_rate": 0.002946,
      "loss": 4.4387,
      "step": 982
    },
    {
      "epoch": 0.00983,
      "grad_norm": 0.7539130672753217,
      "learning_rate": 0.0029490000000000002,
      "loss": 4.4372,
      "step": 983
    },
    {
      "epoch": 0.00984,
      "grad_norm": 0.7045823796689694,
      "learning_rate": 0.002952,
      "loss": 4.4213,
      "step": 984
    },
    {
      "epoch": 0.00985,
      "grad_norm": 0.6478785171714704,
      "learning_rate": 0.002955,
      "loss": 4.444,
      "step": 985
    },
    {
      "epoch": 0.00986,
      "grad_norm": 0.5640139479823427,
      "learning_rate": 0.002958,
      "loss": 4.3997,
      "step": 986
    },
    {
      "epoch": 0.00987,
      "grad_norm": 0.6145958247621988,
      "learning_rate": 0.002961,
      "loss": 4.4141,
      "step": 987
    },
    {
      "epoch": 0.00988,
      "grad_norm": 0.7121366087401472,
      "learning_rate": 0.002964,
      "loss": 4.4512,
      "step": 988
    },
    {
      "epoch": 0.00989,
      "grad_norm": 0.7474063595618389,
      "learning_rate": 0.002967,
      "loss": 4.3897,
      "step": 989
    },
    {
      "epoch": 0.0099,
      "grad_norm": 0.7608901784540225,
      "learning_rate": 0.00297,
      "loss": 4.4296,
      "step": 990
    },
    {
      "epoch": 0.00991,
      "grad_norm": 0.8136541225519112,
      "learning_rate": 0.002973,
      "loss": 4.4314,
      "step": 991
    },
    {
      "epoch": 0.00992,
      "grad_norm": 0.7865701920195308,
      "learning_rate": 0.002976,
      "loss": 4.4266,
      "step": 992
    },
    {
      "epoch": 0.00993,
      "grad_norm": 0.8315737176917932,
      "learning_rate": 0.002979,
      "loss": 4.4267,
      "step": 993
    },
    {
      "epoch": 0.00994,
      "grad_norm": 0.7821802322270756,
      "learning_rate": 0.002982,
      "loss": 4.4281,
      "step": 994
    },
    {
      "epoch": 0.00995,
      "grad_norm": 0.8705272973695986,
      "learning_rate": 0.0029850000000000002,
      "loss": 4.4331,
      "step": 995
    },
    {
      "epoch": 0.00996,
      "grad_norm": 0.9954881536889274,
      "learning_rate": 0.002988,
      "loss": 4.4408,
      "step": 996
    },
    {
      "epoch": 0.00997,
      "grad_norm": 0.9720370341600497,
      "learning_rate": 0.002991,
      "loss": 4.4354,
      "step": 997
    },
    {
      "epoch": 0.00998,
      "grad_norm": 0.7522042168889891,
      "learning_rate": 0.002994,
      "loss": 4.4331,
      "step": 998
    },
    {
      "epoch": 0.00999,
      "grad_norm": 0.9425882614336212,
      "learning_rate": 0.002997,
      "loss": 4.4261,
      "step": 999
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9689827453070083,
      "learning_rate": 0.003,
      "loss": 4.458,
      "step": 1000
    },
    {
      "epoch": 0.01001,
      "grad_norm": 0.9802085236972514,
      "learning_rate": 0.003,
      "loss": 4.4569,
      "step": 1001
    },
    {
      "epoch": 0.01002,
      "grad_norm": 0.9742392022619593,
      "learning_rate": 0.003,
      "loss": 4.4262,
      "step": 1002
    },
    {
      "epoch": 0.01003,
      "grad_norm": 0.9115330327806416,
      "learning_rate": 0.003,
      "loss": 4.4513,
      "step": 1003
    },
    {
      "epoch": 0.01004,
      "grad_norm": 0.8562148232052564,
      "learning_rate": 0.003,
      "loss": 4.4397,
      "step": 1004
    },
    {
      "epoch": 0.01005,
      "grad_norm": 0.9105663755998641,
      "learning_rate": 0.003,
      "loss": 4.4154,
      "step": 1005
    },
    {
      "epoch": 0.01006,
      "grad_norm": 1.018045955439956,
      "learning_rate": 0.003,
      "loss": 4.433,
      "step": 1006
    },
    {
      "epoch": 0.01007,
      "grad_norm": 0.9357911521230117,
      "learning_rate": 0.003,
      "loss": 4.4713,
      "step": 1007
    },
    {
      "epoch": 0.01008,
      "grad_norm": 0.9681416399703533,
      "learning_rate": 0.003,
      "loss": 4.4846,
      "step": 1008
    },
    {
      "epoch": 0.01009,
      "grad_norm": 1.030244827646589,
      "learning_rate": 0.003,
      "loss": 4.4619,
      "step": 1009
    },
    {
      "epoch": 0.0101,
      "grad_norm": 1.0010362528613534,
      "learning_rate": 0.003,
      "loss": 4.4688,
      "step": 1010
    },
    {
      "epoch": 0.01011,
      "grad_norm": 1.0284858383133122,
      "learning_rate": 0.003,
      "loss": 4.4881,
      "step": 1011
    },
    {
      "epoch": 0.01012,
      "grad_norm": 0.9778345429910184,
      "learning_rate": 0.003,
      "loss": 4.4674,
      "step": 1012
    },
    {
      "epoch": 0.01013,
      "grad_norm": 0.8705611016855861,
      "learning_rate": 0.003,
      "loss": 4.4919,
      "step": 1013
    },
    {
      "epoch": 0.01014,
      "grad_norm": 0.8083144078788229,
      "learning_rate": 0.003,
      "loss": 4.4268,
      "step": 1014
    },
    {
      "epoch": 0.01015,
      "grad_norm": 0.7155205086193928,
      "learning_rate": 0.003,
      "loss": 4.4657,
      "step": 1015
    },
    {
      "epoch": 0.01016,
      "grad_norm": 0.5844948538982189,
      "learning_rate": 0.003,
      "loss": 4.466,
      "step": 1016
    },
    {
      "epoch": 0.01017,
      "grad_norm": 0.50419422786756,
      "learning_rate": 0.003,
      "loss": 4.4019,
      "step": 1017
    },
    {
      "epoch": 0.01018,
      "grad_norm": 0.5273528303530755,
      "learning_rate": 0.003,
      "loss": 4.4545,
      "step": 1018
    },
    {
      "epoch": 0.01019,
      "grad_norm": 0.5329688106940915,
      "learning_rate": 0.003,
      "loss": 4.4315,
      "step": 1019
    },
    {
      "epoch": 0.0102,
      "grad_norm": 0.6285459843623249,
      "learning_rate": 0.003,
      "loss": 4.4292,
      "step": 1020
    },
    {
      "epoch": 0.01021,
      "grad_norm": 0.6444525489483212,
      "learning_rate": 0.003,
      "loss": 4.412,
      "step": 1021
    },
    {
      "epoch": 0.01022,
      "grad_norm": 0.6609826768689684,
      "learning_rate": 0.003,
      "loss": 4.4194,
      "step": 1022
    },
    {
      "epoch": 0.01023,
      "grad_norm": 0.6479610287689606,
      "learning_rate": 0.003,
      "loss": 4.4151,
      "step": 1023
    },
    {
      "epoch": 0.01024,
      "grad_norm": 0.7091931447524652,
      "learning_rate": 0.003,
      "loss": 4.3798,
      "step": 1024
    },
    {
      "epoch": 0.01025,
      "grad_norm": 0.7026391214213478,
      "learning_rate": 0.003,
      "loss": 4.4116,
      "step": 1025
    },
    {
      "epoch": 0.01026,
      "grad_norm": 0.6731332289892269,
      "learning_rate": 0.003,
      "loss": 4.3991,
      "step": 1026
    },
    {
      "epoch": 0.01027,
      "grad_norm": 0.6590644472165706,
      "learning_rate": 0.003,
      "loss": 4.4177,
      "step": 1027
    },
    {
      "epoch": 0.01028,
      "grad_norm": 0.7485101036485022,
      "learning_rate": 0.003,
      "loss": 4.4014,
      "step": 1028
    },
    {
      "epoch": 0.01029,
      "grad_norm": 0.7198477689690366,
      "learning_rate": 0.003,
      "loss": 4.3803,
      "step": 1029
    },
    {
      "epoch": 0.0103,
      "grad_norm": 0.5542753635749327,
      "learning_rate": 0.003,
      "loss": 4.4023,
      "step": 1030
    },
    {
      "epoch": 0.01031,
      "grad_norm": 0.5292390629019561,
      "learning_rate": 0.003,
      "loss": 4.4144,
      "step": 1031
    },
    {
      "epoch": 0.01032,
      "grad_norm": 0.500926566427153,
      "learning_rate": 0.003,
      "loss": 4.3837,
      "step": 1032
    },
    {
      "epoch": 0.01033,
      "grad_norm": 0.49056326531128164,
      "learning_rate": 0.003,
      "loss": 4.4201,
      "step": 1033
    },
    {
      "epoch": 0.01034,
      "grad_norm": 0.5309292795236984,
      "learning_rate": 0.003,
      "loss": 4.3865,
      "step": 1034
    },
    {
      "epoch": 0.01035,
      "grad_norm": 0.6084296376545847,
      "learning_rate": 0.003,
      "loss": 4.3892,
      "step": 1035
    },
    {
      "epoch": 0.01036,
      "grad_norm": 0.8615895093148164,
      "learning_rate": 0.003,
      "loss": 4.3883,
      "step": 1036
    },
    {
      "epoch": 0.01037,
      "grad_norm": 0.8936988458454226,
      "learning_rate": 0.003,
      "loss": 4.4336,
      "step": 1037
    },
    {
      "epoch": 0.01038,
      "grad_norm": 0.6948017038229403,
      "learning_rate": 0.003,
      "loss": 4.4116,
      "step": 1038
    },
    {
      "epoch": 0.01039,
      "grad_norm": 0.9114790426144561,
      "learning_rate": 0.003,
      "loss": 4.4152,
      "step": 1039
    },
    {
      "epoch": 0.0104,
      "grad_norm": 0.8448966818619524,
      "learning_rate": 0.003,
      "loss": 4.4288,
      "step": 1040
    },
    {
      "epoch": 0.01041,
      "grad_norm": 0.7995140521375168,
      "learning_rate": 0.003,
      "loss": 4.404,
      "step": 1041
    },
    {
      "epoch": 0.01042,
      "grad_norm": 0.8979223927667839,
      "learning_rate": 0.003,
      "loss": 4.4357,
      "step": 1042
    },
    {
      "epoch": 0.01043,
      "grad_norm": 0.7233892787514891,
      "learning_rate": 0.003,
      "loss": 4.3775,
      "step": 1043
    },
    {
      "epoch": 0.01044,
      "grad_norm": 0.782866039824708,
      "learning_rate": 0.003,
      "loss": 4.4144,
      "step": 1044
    },
    {
      "epoch": 0.01045,
      "grad_norm": 0.841755171402396,
      "learning_rate": 0.003,
      "loss": 4.4353,
      "step": 1045
    },
    {
      "epoch": 0.01046,
      "grad_norm": 0.780348729633882,
      "learning_rate": 0.003,
      "loss": 4.4043,
      "step": 1046
    },
    {
      "epoch": 0.01047,
      "grad_norm": 0.8623775519511051,
      "learning_rate": 0.003,
      "loss": 4.4135,
      "step": 1047
    },
    {
      "epoch": 0.01048,
      "grad_norm": 0.9009461265034386,
      "learning_rate": 0.003,
      "loss": 4.4049,
      "step": 1048
    },
    {
      "epoch": 0.01049,
      "grad_norm": 0.739794637514069,
      "learning_rate": 0.003,
      "loss": 4.3786,
      "step": 1049
    },
    {
      "epoch": 0.0105,
      "grad_norm": 0.6259101207085414,
      "learning_rate": 0.003,
      "loss": 4.4154,
      "step": 1050
    },
    {
      "epoch": 0.01051,
      "grad_norm": 0.6694575857098493,
      "learning_rate": 0.003,
      "loss": 4.393,
      "step": 1051
    },
    {
      "epoch": 0.01052,
      "grad_norm": 0.7141380180592181,
      "learning_rate": 0.003,
      "loss": 4.4052,
      "step": 1052
    },
    {
      "epoch": 0.01053,
      "grad_norm": 0.7943106393078887,
      "learning_rate": 0.003,
      "loss": 4.4143,
      "step": 1053
    },
    {
      "epoch": 0.01054,
      "grad_norm": 0.7478655479661611,
      "learning_rate": 0.003,
      "loss": 4.4077,
      "step": 1054
    },
    {
      "epoch": 0.01055,
      "grad_norm": 0.707966249364238,
      "learning_rate": 0.003,
      "loss": 4.3861,
      "step": 1055
    },
    {
      "epoch": 0.01056,
      "grad_norm": 0.6358438963919717,
      "learning_rate": 0.003,
      "loss": 4.3823,
      "step": 1056
    },
    {
      "epoch": 0.01057,
      "grad_norm": 0.5691063929797074,
      "learning_rate": 0.003,
      "loss": 4.3849,
      "step": 1057
    },
    {
      "epoch": 0.01058,
      "grad_norm": 0.5109096197008457,
      "learning_rate": 0.003,
      "loss": 4.3737,
      "step": 1058
    },
    {
      "epoch": 0.01059,
      "grad_norm": 0.419033279294021,
      "learning_rate": 0.003,
      "loss": 4.3749,
      "step": 1059
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.4506837031966603,
      "learning_rate": 0.003,
      "loss": 4.3654,
      "step": 1060
    },
    {
      "epoch": 0.01061,
      "grad_norm": 0.42662194113596513,
      "learning_rate": 0.003,
      "loss": 4.361,
      "step": 1061
    },
    {
      "epoch": 0.01062,
      "grad_norm": 0.5018523046776621,
      "learning_rate": 0.003,
      "loss": 4.3568,
      "step": 1062
    },
    {
      "epoch": 0.01063,
      "grad_norm": 0.6580287488917459,
      "learning_rate": 0.003,
      "loss": 4.3552,
      "step": 1063
    },
    {
      "epoch": 0.01064,
      "grad_norm": 0.831793270479749,
      "learning_rate": 0.003,
      "loss": 4.3844,
      "step": 1064
    },
    {
      "epoch": 0.01065,
      "grad_norm": 1.0701105543701324,
      "learning_rate": 0.003,
      "loss": 4.3903,
      "step": 1065
    },
    {
      "epoch": 0.01066,
      "grad_norm": 0.943731953453328,
      "learning_rate": 0.003,
      "loss": 4.4118,
      "step": 1066
    },
    {
      "epoch": 0.01067,
      "grad_norm": 0.9236025360095688,
      "learning_rate": 0.003,
      "loss": 4.4136,
      "step": 1067
    },
    {
      "epoch": 0.01068,
      "grad_norm": 1.1282330525862354,
      "learning_rate": 0.003,
      "loss": 4.4118,
      "step": 1068
    },
    {
      "epoch": 0.01069,
      "grad_norm": 0.840410887973158,
      "learning_rate": 0.003,
      "loss": 4.4054,
      "step": 1069
    },
    {
      "epoch": 0.0107,
      "grad_norm": 0.733631994312363,
      "learning_rate": 0.003,
      "loss": 4.4566,
      "step": 1070
    },
    {
      "epoch": 0.01071,
      "grad_norm": 0.7799052277350242,
      "learning_rate": 0.003,
      "loss": 4.3774,
      "step": 1071
    },
    {
      "epoch": 0.01072,
      "grad_norm": 0.7960071687741883,
      "learning_rate": 0.003,
      "loss": 4.4136,
      "step": 1072
    },
    {
      "epoch": 0.01073,
      "grad_norm": 0.6975430946737453,
      "learning_rate": 0.003,
      "loss": 4.4079,
      "step": 1073
    },
    {
      "epoch": 0.01074,
      "grad_norm": 0.6813769264433432,
      "learning_rate": 0.003,
      "loss": 4.37,
      "step": 1074
    },
    {
      "epoch": 0.01075,
      "grad_norm": 0.5971783233576602,
      "learning_rate": 0.003,
      "loss": 4.4063,
      "step": 1075
    },
    {
      "epoch": 0.01076,
      "grad_norm": 0.5510713740534053,
      "learning_rate": 0.003,
      "loss": 4.3967,
      "step": 1076
    },
    {
      "epoch": 0.01077,
      "grad_norm": 0.5595097233894012,
      "learning_rate": 0.003,
      "loss": 4.3917,
      "step": 1077
    },
    {
      "epoch": 0.01078,
      "grad_norm": 0.6230934184673731,
      "learning_rate": 0.003,
      "loss": 4.3491,
      "step": 1078
    },
    {
      "epoch": 0.01079,
      "grad_norm": 0.7178707958532615,
      "learning_rate": 0.003,
      "loss": 4.3825,
      "step": 1079
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.7484447643520958,
      "learning_rate": 0.003,
      "loss": 4.3665,
      "step": 1080
    },
    {
      "epoch": 0.01081,
      "grad_norm": 0.780034137982597,
      "learning_rate": 0.003,
      "loss": 4.3727,
      "step": 1081
    },
    {
      "epoch": 0.01082,
      "grad_norm": 0.7403701975238451,
      "learning_rate": 0.003,
      "loss": 4.3894,
      "step": 1082
    },
    {
      "epoch": 0.01083,
      "grad_norm": 0.6305537452051372,
      "learning_rate": 0.003,
      "loss": 4.3733,
      "step": 1083
    },
    {
      "epoch": 0.01084,
      "grad_norm": 0.7435200213630843,
      "learning_rate": 0.003,
      "loss": 4.3825,
      "step": 1084
    },
    {
      "epoch": 0.01085,
      "grad_norm": 0.7405129376016774,
      "learning_rate": 0.003,
      "loss": 4.3735,
      "step": 1085
    },
    {
      "epoch": 0.01086,
      "grad_norm": 0.6037526750507048,
      "learning_rate": 0.003,
      "loss": 4.3449,
      "step": 1086
    },
    {
      "epoch": 0.01087,
      "grad_norm": 0.5349574840042758,
      "learning_rate": 0.003,
      "loss": 4.3549,
      "step": 1087
    },
    {
      "epoch": 0.01088,
      "grad_norm": 0.5555341529780033,
      "learning_rate": 0.003,
      "loss": 4.3917,
      "step": 1088
    },
    {
      "epoch": 0.01089,
      "grad_norm": 0.6254527345250118,
      "learning_rate": 0.003,
      "loss": 4.3599,
      "step": 1089
    },
    {
      "epoch": 0.0109,
      "grad_norm": 0.6828156926551208,
      "learning_rate": 0.003,
      "loss": 4.3884,
      "step": 1090
    },
    {
      "epoch": 0.01091,
      "grad_norm": 0.5753021180468998,
      "learning_rate": 0.003,
      "loss": 4.3549,
      "step": 1091
    },
    {
      "epoch": 0.01092,
      "grad_norm": 0.4077341561870379,
      "learning_rate": 0.003,
      "loss": 4.3436,
      "step": 1092
    },
    {
      "epoch": 0.01093,
      "grad_norm": 0.47615819716416974,
      "learning_rate": 0.003,
      "loss": 4.3664,
      "step": 1093
    },
    {
      "epoch": 0.01094,
      "grad_norm": 0.5845002574061751,
      "learning_rate": 0.003,
      "loss": 4.3553,
      "step": 1094
    },
    {
      "epoch": 0.01095,
      "grad_norm": 0.8583788887394415,
      "learning_rate": 0.003,
      "loss": 4.3944,
      "step": 1095
    },
    {
      "epoch": 0.01096,
      "grad_norm": 1.0479503783625854,
      "learning_rate": 0.003,
      "loss": 4.3824,
      "step": 1096
    },
    {
      "epoch": 0.01097,
      "grad_norm": 0.9584408455564232,
      "learning_rate": 0.003,
      "loss": 4.3695,
      "step": 1097
    },
    {
      "epoch": 0.01098,
      "grad_norm": 1.2545731298718157,
      "learning_rate": 0.003,
      "loss": 4.4068,
      "step": 1098
    },
    {
      "epoch": 0.01099,
      "grad_norm": 0.8305012272413557,
      "learning_rate": 0.003,
      "loss": 4.3706,
      "step": 1099
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.7311868902259275,
      "learning_rate": 0.003,
      "loss": 4.3677,
      "step": 1100
    },
    {
      "epoch": 0.01101,
      "grad_norm": 0.6742597336847759,
      "learning_rate": 0.003,
      "loss": 4.3752,
      "step": 1101
    },
    {
      "epoch": 0.01102,
      "grad_norm": 0.6773795133108548,
      "learning_rate": 0.003,
      "loss": 4.3802,
      "step": 1102
    },
    {
      "epoch": 0.01103,
      "grad_norm": 0.6414092553075785,
      "learning_rate": 0.003,
      "loss": 4.3701,
      "step": 1103
    },
    {
      "epoch": 0.01104,
      "grad_norm": 0.6378763268399013,
      "learning_rate": 0.003,
      "loss": 4.3589,
      "step": 1104
    },
    {
      "epoch": 0.01105,
      "grad_norm": 0.6837675260878959,
      "learning_rate": 0.003,
      "loss": 4.3856,
      "step": 1105
    },
    {
      "epoch": 0.01106,
      "grad_norm": 0.667950958790211,
      "learning_rate": 0.003,
      "loss": 4.3574,
      "step": 1106
    },
    {
      "epoch": 0.01107,
      "grad_norm": 0.6082482122734244,
      "learning_rate": 0.003,
      "loss": 4.3623,
      "step": 1107
    },
    {
      "epoch": 0.01108,
      "grad_norm": 0.5983800302348057,
      "learning_rate": 0.003,
      "loss": 4.3313,
      "step": 1108
    },
    {
      "epoch": 0.01109,
      "grad_norm": 0.6046679697278923,
      "learning_rate": 0.003,
      "loss": 4.3476,
      "step": 1109
    },
    {
      "epoch": 0.0111,
      "grad_norm": 0.6353880163110637,
      "learning_rate": 0.003,
      "loss": 4.3675,
      "step": 1110
    },
    {
      "epoch": 0.01111,
      "grad_norm": 0.7049628214149826,
      "learning_rate": 0.003,
      "loss": 4.3473,
      "step": 1111
    },
    {
      "epoch": 0.01112,
      "grad_norm": 0.758960503794264,
      "learning_rate": 0.003,
      "loss": 4.369,
      "step": 1112
    },
    {
      "epoch": 0.01113,
      "grad_norm": 0.6091686772357772,
      "learning_rate": 0.003,
      "loss": 4.3335,
      "step": 1113
    },
    {
      "epoch": 0.01114,
      "grad_norm": 0.628169460357088,
      "learning_rate": 0.003,
      "loss": 4.3462,
      "step": 1114
    },
    {
      "epoch": 0.01115,
      "grad_norm": 0.7453432538813878,
      "learning_rate": 0.003,
      "loss": 4.3779,
      "step": 1115
    },
    {
      "epoch": 0.01116,
      "grad_norm": 0.9959163456029771,
      "learning_rate": 0.003,
      "loss": 4.3771,
      "step": 1116
    },
    {
      "epoch": 0.01117,
      "grad_norm": 0.9566018042437344,
      "learning_rate": 0.003,
      "loss": 4.3915,
      "step": 1117
    },
    {
      "epoch": 0.01118,
      "grad_norm": 0.6794219206934775,
      "learning_rate": 0.003,
      "loss": 4.372,
      "step": 1118
    },
    {
      "epoch": 0.01119,
      "grad_norm": 0.6027310157842346,
      "learning_rate": 0.003,
      "loss": 4.375,
      "step": 1119
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.6064414406921254,
      "learning_rate": 0.003,
      "loss": 4.3588,
      "step": 1120
    },
    {
      "epoch": 0.01121,
      "grad_norm": 0.643465354938861,
      "learning_rate": 0.003,
      "loss": 4.3691,
      "step": 1121
    },
    {
      "epoch": 0.01122,
      "grad_norm": 0.8439491151148678,
      "learning_rate": 0.003,
      "loss": 4.3539,
      "step": 1122
    },
    {
      "epoch": 0.01123,
      "grad_norm": 0.857123487934385,
      "learning_rate": 0.003,
      "loss": 4.345,
      "step": 1123
    },
    {
      "epoch": 0.01124,
      "grad_norm": 0.7412296015988188,
      "learning_rate": 0.003,
      "loss": 4.3865,
      "step": 1124
    },
    {
      "epoch": 0.01125,
      "grad_norm": 0.8143761816040683,
      "learning_rate": 0.003,
      "loss": 4.3583,
      "step": 1125
    },
    {
      "epoch": 0.01126,
      "grad_norm": 0.741767094028342,
      "learning_rate": 0.003,
      "loss": 4.3618,
      "step": 1126
    },
    {
      "epoch": 0.01127,
      "grad_norm": 0.737420855232079,
      "learning_rate": 0.003,
      "loss": 4.3948,
      "step": 1127
    },
    {
      "epoch": 0.01128,
      "grad_norm": 0.7656500975813709,
      "learning_rate": 0.003,
      "loss": 4.372,
      "step": 1128
    },
    {
      "epoch": 0.01129,
      "grad_norm": 0.8248659664813461,
      "learning_rate": 0.003,
      "loss": 4.3685,
      "step": 1129
    },
    {
      "epoch": 0.0113,
      "grad_norm": 0.7756542452356111,
      "learning_rate": 0.003,
      "loss": 4.3447,
      "step": 1130
    },
    {
      "epoch": 0.01131,
      "grad_norm": 0.7965849423705411,
      "learning_rate": 0.003,
      "loss": 4.3615,
      "step": 1131
    },
    {
      "epoch": 0.01132,
      "grad_norm": 0.718692737781693,
      "learning_rate": 0.003,
      "loss": 4.3824,
      "step": 1132
    },
    {
      "epoch": 0.01133,
      "grad_norm": 0.6500905111846983,
      "learning_rate": 0.003,
      "loss": 4.34,
      "step": 1133
    },
    {
      "epoch": 0.01134,
      "grad_norm": 0.6052834019155894,
      "learning_rate": 0.003,
      "loss": 4.3525,
      "step": 1134
    },
    {
      "epoch": 0.01135,
      "grad_norm": 0.57728723025885,
      "learning_rate": 0.003,
      "loss": 4.3767,
      "step": 1135
    },
    {
      "epoch": 0.01136,
      "grad_norm": 0.6622614298653483,
      "learning_rate": 0.003,
      "loss": 4.3514,
      "step": 1136
    },
    {
      "epoch": 0.01137,
      "grad_norm": 0.6945355696134266,
      "learning_rate": 0.003,
      "loss": 4.3784,
      "step": 1137
    },
    {
      "epoch": 0.01138,
      "grad_norm": 0.7423434054596434,
      "learning_rate": 0.003,
      "loss": 4.3361,
      "step": 1138
    },
    {
      "epoch": 0.01139,
      "grad_norm": 0.7678127530736037,
      "learning_rate": 0.003,
      "loss": 4.3463,
      "step": 1139
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.7470822118949633,
      "learning_rate": 0.003,
      "loss": 4.3322,
      "step": 1140
    },
    {
      "epoch": 0.01141,
      "grad_norm": 0.903198095826591,
      "learning_rate": 0.003,
      "loss": 4.3608,
      "step": 1141
    },
    {
      "epoch": 0.01142,
      "grad_norm": 1.0797547624524322,
      "learning_rate": 0.003,
      "loss": 4.3612,
      "step": 1142
    },
    {
      "epoch": 0.01143,
      "grad_norm": 0.7877548466868329,
      "learning_rate": 0.003,
      "loss": 4.3542,
      "step": 1143
    },
    {
      "epoch": 0.01144,
      "grad_norm": 0.8147882944795127,
      "learning_rate": 0.003,
      "loss": 4.3253,
      "step": 1144
    },
    {
      "epoch": 0.01145,
      "grad_norm": 0.7148112106554612,
      "learning_rate": 0.003,
      "loss": 4.3655,
      "step": 1145
    },
    {
      "epoch": 0.01146,
      "grad_norm": 0.6580426381015894,
      "learning_rate": 0.003,
      "loss": 4.3445,
      "step": 1146
    },
    {
      "epoch": 0.01147,
      "grad_norm": 0.7448256416265617,
      "learning_rate": 0.003,
      "loss": 4.3367,
      "step": 1147
    },
    {
      "epoch": 0.01148,
      "grad_norm": 0.6968832253815664,
      "learning_rate": 0.003,
      "loss": 4.3386,
      "step": 1148
    },
    {
      "epoch": 0.01149,
      "grad_norm": 0.627190656091196,
      "learning_rate": 0.003,
      "loss": 4.3527,
      "step": 1149
    },
    {
      "epoch": 0.0115,
      "grad_norm": 0.6739971655272956,
      "learning_rate": 0.003,
      "loss": 4.3445,
      "step": 1150
    },
    {
      "epoch": 0.01151,
      "grad_norm": 0.7518825421443525,
      "learning_rate": 0.003,
      "loss": 4.3445,
      "step": 1151
    },
    {
      "epoch": 0.01152,
      "grad_norm": 0.7628403894732813,
      "learning_rate": 0.003,
      "loss": 4.3529,
      "step": 1152
    },
    {
      "epoch": 0.01153,
      "grad_norm": 0.726109999889872,
      "learning_rate": 0.003,
      "loss": 4.3668,
      "step": 1153
    },
    {
      "epoch": 0.01154,
      "grad_norm": 0.7747132516080261,
      "learning_rate": 0.003,
      "loss": 4.3438,
      "step": 1154
    },
    {
      "epoch": 0.01155,
      "grad_norm": 0.8180276502719757,
      "learning_rate": 0.003,
      "loss": 4.3369,
      "step": 1155
    },
    {
      "epoch": 0.01156,
      "grad_norm": 0.7757994684934312,
      "learning_rate": 0.003,
      "loss": 4.3405,
      "step": 1156
    },
    {
      "epoch": 0.01157,
      "grad_norm": 0.7233155240278132,
      "learning_rate": 0.003,
      "loss": 4.3659,
      "step": 1157
    },
    {
      "epoch": 0.01158,
      "grad_norm": 0.6552894912204377,
      "learning_rate": 0.003,
      "loss": 4.332,
      "step": 1158
    },
    {
      "epoch": 0.01159,
      "grad_norm": 0.7654422010962233,
      "learning_rate": 0.003,
      "loss": 4.3316,
      "step": 1159
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.7129248873282286,
      "learning_rate": 0.003,
      "loss": 4.3462,
      "step": 1160
    },
    {
      "epoch": 0.01161,
      "grad_norm": 0.728340877307682,
      "learning_rate": 0.003,
      "loss": 4.3213,
      "step": 1161
    },
    {
      "epoch": 0.01162,
      "grad_norm": 0.6672912532139038,
      "learning_rate": 0.003,
      "loss": 4.3344,
      "step": 1162
    },
    {
      "epoch": 0.01163,
      "grad_norm": 0.6087688372398936,
      "learning_rate": 0.003,
      "loss": 4.3512,
      "step": 1163
    },
    {
      "epoch": 0.01164,
      "grad_norm": 0.5012921787632111,
      "learning_rate": 0.003,
      "loss": 4.3329,
      "step": 1164
    },
    {
      "epoch": 0.01165,
      "grad_norm": 0.4972192156782153,
      "learning_rate": 0.003,
      "loss": 4.3277,
      "step": 1165
    },
    {
      "epoch": 0.01166,
      "grad_norm": 0.48804976467805194,
      "learning_rate": 0.003,
      "loss": 4.3262,
      "step": 1166
    },
    {
      "epoch": 0.01167,
      "grad_norm": 0.5481538887564317,
      "learning_rate": 0.003,
      "loss": 4.2952,
      "step": 1167
    },
    {
      "epoch": 0.01168,
      "grad_norm": 0.6090792597366076,
      "learning_rate": 0.003,
      "loss": 4.3026,
      "step": 1168
    },
    {
      "epoch": 0.01169,
      "grad_norm": 0.653085333922602,
      "learning_rate": 0.003,
      "loss": 4.347,
      "step": 1169
    },
    {
      "epoch": 0.0117,
      "grad_norm": 0.62436544437129,
      "learning_rate": 0.003,
      "loss": 4.3309,
      "step": 1170
    },
    {
      "epoch": 0.01171,
      "grad_norm": 0.654802486268078,
      "learning_rate": 0.003,
      "loss": 4.3143,
      "step": 1171
    },
    {
      "epoch": 0.01172,
      "grad_norm": 0.6246624927886145,
      "learning_rate": 0.003,
      "loss": 4.3038,
      "step": 1172
    },
    {
      "epoch": 0.01173,
      "grad_norm": 0.6813303457409844,
      "learning_rate": 0.003,
      "loss": 4.3382,
      "step": 1173
    },
    {
      "epoch": 0.01174,
      "grad_norm": 0.7721136771646261,
      "learning_rate": 0.003,
      "loss": 4.3203,
      "step": 1174
    },
    {
      "epoch": 0.01175,
      "grad_norm": 0.823782632958531,
      "learning_rate": 0.003,
      "loss": 4.3152,
      "step": 1175
    },
    {
      "epoch": 0.01176,
      "grad_norm": 0.9763749462159719,
      "learning_rate": 0.003,
      "loss": 4.3509,
      "step": 1176
    },
    {
      "epoch": 0.01177,
      "grad_norm": 1.0894015492716702,
      "learning_rate": 0.003,
      "loss": 4.3444,
      "step": 1177
    },
    {
      "epoch": 0.01178,
      "grad_norm": 0.8156537670820067,
      "learning_rate": 0.003,
      "loss": 4.3473,
      "step": 1178
    },
    {
      "epoch": 0.01179,
      "grad_norm": 0.8529362760715462,
      "learning_rate": 0.003,
      "loss": 4.3276,
      "step": 1179
    },
    {
      "epoch": 0.0118,
      "grad_norm": 0.857392382910556,
      "learning_rate": 0.003,
      "loss": 4.395,
      "step": 1180
    },
    {
      "epoch": 0.01181,
      "grad_norm": 0.8128436446210868,
      "learning_rate": 0.003,
      "loss": 4.3809,
      "step": 1181
    },
    {
      "epoch": 0.01182,
      "grad_norm": 0.911584010746237,
      "learning_rate": 0.003,
      "loss": 4.3433,
      "step": 1182
    },
    {
      "epoch": 0.01183,
      "grad_norm": 0.8460613119287651,
      "learning_rate": 0.003,
      "loss": 4.337,
      "step": 1183
    },
    {
      "epoch": 0.01184,
      "grad_norm": 0.7730717809982315,
      "learning_rate": 0.003,
      "loss": 4.3753,
      "step": 1184
    },
    {
      "epoch": 0.01185,
      "grad_norm": 0.7718355908910234,
      "learning_rate": 0.003,
      "loss": 4.3449,
      "step": 1185
    },
    {
      "epoch": 0.01186,
      "grad_norm": 0.749856576054878,
      "learning_rate": 0.003,
      "loss": 4.3669,
      "step": 1186
    },
    {
      "epoch": 0.01187,
      "grad_norm": 0.6703031442863907,
      "learning_rate": 0.003,
      "loss": 4.33,
      "step": 1187
    },
    {
      "epoch": 0.01188,
      "grad_norm": 0.6597409636847196,
      "learning_rate": 0.003,
      "loss": 4.3387,
      "step": 1188
    },
    {
      "epoch": 0.01189,
      "grad_norm": 0.7528540145175824,
      "learning_rate": 0.003,
      "loss": 4.3431,
      "step": 1189
    },
    {
      "epoch": 0.0119,
      "grad_norm": 0.7990197433249405,
      "learning_rate": 0.003,
      "loss": 4.3273,
      "step": 1190
    },
    {
      "epoch": 0.01191,
      "grad_norm": 0.8012185403960035,
      "learning_rate": 0.003,
      "loss": 4.3081,
      "step": 1191
    },
    {
      "epoch": 0.01192,
      "grad_norm": 0.7571323847445259,
      "learning_rate": 0.003,
      "loss": 4.3381,
      "step": 1192
    },
    {
      "epoch": 0.01193,
      "grad_norm": 0.6624809456477357,
      "learning_rate": 0.003,
      "loss": 4.3371,
      "step": 1193
    },
    {
      "epoch": 0.01194,
      "grad_norm": 0.7303795208901569,
      "learning_rate": 0.003,
      "loss": 4.3395,
      "step": 1194
    },
    {
      "epoch": 0.01195,
      "grad_norm": 0.7841704264970735,
      "learning_rate": 0.003,
      "loss": 4.3211,
      "step": 1195
    },
    {
      "epoch": 0.01196,
      "grad_norm": 0.8218420512118195,
      "learning_rate": 0.003,
      "loss": 4.3286,
      "step": 1196
    },
    {
      "epoch": 0.01197,
      "grad_norm": 0.6346054129298183,
      "learning_rate": 0.003,
      "loss": 4.2874,
      "step": 1197
    },
    {
      "epoch": 0.01198,
      "grad_norm": 0.648198494061273,
      "learning_rate": 0.003,
      "loss": 4.3575,
      "step": 1198
    },
    {
      "epoch": 0.01199,
      "grad_norm": 0.719551428234481,
      "learning_rate": 0.003,
      "loss": 4.3378,
      "step": 1199
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.7355036739818444,
      "learning_rate": 0.003,
      "loss": 4.3295,
      "step": 1200
    },
    {
      "epoch": 0.01201,
      "grad_norm": 0.9984238489578499,
      "learning_rate": 0.003,
      "loss": 4.3657,
      "step": 1201
    },
    {
      "epoch": 0.01202,
      "grad_norm": 1.2422658156142912,
      "learning_rate": 0.003,
      "loss": 4.3719,
      "step": 1202
    },
    {
      "epoch": 0.01203,
      "grad_norm": 0.7461879644958674,
      "learning_rate": 0.003,
      "loss": 4.3511,
      "step": 1203
    },
    {
      "epoch": 0.01204,
      "grad_norm": 0.8846740490522026,
      "learning_rate": 0.003,
      "loss": 4.3624,
      "step": 1204
    },
    {
      "epoch": 0.01205,
      "grad_norm": 0.7423867106895455,
      "learning_rate": 0.003,
      "loss": 4.3403,
      "step": 1205
    },
    {
      "epoch": 0.01206,
      "grad_norm": 0.7502877844645354,
      "learning_rate": 0.003,
      "loss": 4.3858,
      "step": 1206
    },
    {
      "epoch": 0.01207,
      "grad_norm": 0.8010490453321524,
      "learning_rate": 0.003,
      "loss": 4.3053,
      "step": 1207
    },
    {
      "epoch": 0.01208,
      "grad_norm": 0.7863992142209323,
      "learning_rate": 0.003,
      "loss": 4.3665,
      "step": 1208
    },
    {
      "epoch": 0.01209,
      "grad_norm": 0.6936314129825434,
      "learning_rate": 0.003,
      "loss": 4.3419,
      "step": 1209
    },
    {
      "epoch": 0.0121,
      "grad_norm": 0.5834058626530795,
      "learning_rate": 0.003,
      "loss": 4.3425,
      "step": 1210
    },
    {
      "epoch": 0.01211,
      "grad_norm": 0.5442454390371272,
      "learning_rate": 0.003,
      "loss": 4.3248,
      "step": 1211
    },
    {
      "epoch": 0.01212,
      "grad_norm": 0.5640531404426855,
      "learning_rate": 0.003,
      "loss": 4.3485,
      "step": 1212
    },
    {
      "epoch": 0.01213,
      "grad_norm": 0.4686052961934471,
      "learning_rate": 0.003,
      "loss": 4.3204,
      "step": 1213
    },
    {
      "epoch": 0.01214,
      "grad_norm": 0.4623933928179833,
      "learning_rate": 0.003,
      "loss": 4.3023,
      "step": 1214
    },
    {
      "epoch": 0.01215,
      "grad_norm": 0.390314877629531,
      "learning_rate": 0.003,
      "loss": 4.3075,
      "step": 1215
    },
    {
      "epoch": 0.01216,
      "grad_norm": 0.3573034139707242,
      "learning_rate": 0.003,
      "loss": 4.2995,
      "step": 1216
    },
    {
      "epoch": 0.01217,
      "grad_norm": 0.38593293619021,
      "learning_rate": 0.003,
      "loss": 4.3063,
      "step": 1217
    },
    {
      "epoch": 0.01218,
      "grad_norm": 0.40705674810377235,
      "learning_rate": 0.003,
      "loss": 4.3006,
      "step": 1218
    },
    {
      "epoch": 0.01219,
      "grad_norm": 0.45251062517773044,
      "learning_rate": 0.003,
      "loss": 4.3,
      "step": 1219
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.5627372695280709,
      "learning_rate": 0.003,
      "loss": 4.2943,
      "step": 1220
    },
    {
      "epoch": 0.01221,
      "grad_norm": 0.6302297504421508,
      "learning_rate": 0.003,
      "loss": 4.3122,
      "step": 1221
    },
    {
      "epoch": 0.01222,
      "grad_norm": 0.695146633094906,
      "learning_rate": 0.003,
      "loss": 4.2975,
      "step": 1222
    },
    {
      "epoch": 0.01223,
      "grad_norm": 0.6745684022343855,
      "learning_rate": 0.003,
      "loss": 4.2965,
      "step": 1223
    },
    {
      "epoch": 0.01224,
      "grad_norm": 0.6275537412165774,
      "learning_rate": 0.003,
      "loss": 4.2794,
      "step": 1224
    },
    {
      "epoch": 0.01225,
      "grad_norm": 0.732873035624088,
      "learning_rate": 0.003,
      "loss": 4.3084,
      "step": 1225
    },
    {
      "epoch": 0.01226,
      "grad_norm": 0.8536265776058696,
      "learning_rate": 0.003,
      "loss": 4.3061,
      "step": 1226
    },
    {
      "epoch": 0.01227,
      "grad_norm": 0.7710687106511338,
      "learning_rate": 0.003,
      "loss": 4.3371,
      "step": 1227
    },
    {
      "epoch": 0.01228,
      "grad_norm": 0.6158196915186589,
      "learning_rate": 0.003,
      "loss": 4.3122,
      "step": 1228
    },
    {
      "epoch": 0.01229,
      "grad_norm": 0.5647355795097493,
      "learning_rate": 0.003,
      "loss": 4.2952,
      "step": 1229
    },
    {
      "epoch": 0.0123,
      "grad_norm": 0.5989687141441427,
      "learning_rate": 0.003,
      "loss": 4.3304,
      "step": 1230
    },
    {
      "epoch": 0.01231,
      "grad_norm": 0.6070784312189734,
      "learning_rate": 0.003,
      "loss": 4.2993,
      "step": 1231
    },
    {
      "epoch": 0.01232,
      "grad_norm": 0.6098189672837228,
      "learning_rate": 0.003,
      "loss": 4.276,
      "step": 1232
    },
    {
      "epoch": 0.01233,
      "grad_norm": 0.6187452049200426,
      "learning_rate": 0.003,
      "loss": 4.2843,
      "step": 1233
    },
    {
      "epoch": 0.01234,
      "grad_norm": 0.6847326322735632,
      "learning_rate": 0.003,
      "loss": 4.3016,
      "step": 1234
    },
    {
      "epoch": 0.01235,
      "grad_norm": 0.7347950056629514,
      "learning_rate": 0.003,
      "loss": 4.2975,
      "step": 1235
    },
    {
      "epoch": 0.01236,
      "grad_norm": 0.8404243381213906,
      "learning_rate": 0.003,
      "loss": 4.3037,
      "step": 1236
    },
    {
      "epoch": 0.01237,
      "grad_norm": 1.1666025530795177,
      "learning_rate": 0.003,
      "loss": 4.3338,
      "step": 1237
    },
    {
      "epoch": 0.01238,
      "grad_norm": 0.7706517401085584,
      "learning_rate": 0.003,
      "loss": 4.3184,
      "step": 1238
    },
    {
      "epoch": 0.01239,
      "grad_norm": 0.8892529662707559,
      "learning_rate": 0.003,
      "loss": 4.3055,
      "step": 1239
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.9665932126023419,
      "learning_rate": 0.003,
      "loss": 4.3004,
      "step": 1240
    },
    {
      "epoch": 0.01241,
      "grad_norm": 1.0935280340663984,
      "learning_rate": 0.003,
      "loss": 4.3233,
      "step": 1241
    },
    {
      "epoch": 0.01242,
      "grad_norm": 1.117726648141726,
      "learning_rate": 0.003,
      "loss": 4.3312,
      "step": 1242
    },
    {
      "epoch": 0.01243,
      "grad_norm": 1.0553809973617114,
      "learning_rate": 0.003,
      "loss": 4.3181,
      "step": 1243
    },
    {
      "epoch": 0.01244,
      "grad_norm": 0.9192132297518948,
      "learning_rate": 0.003,
      "loss": 4.3267,
      "step": 1244
    },
    {
      "epoch": 0.01245,
      "grad_norm": 0.9382654947912115,
      "learning_rate": 0.003,
      "loss": 4.3187,
      "step": 1245
    },
    {
      "epoch": 0.01246,
      "grad_norm": 1.0892504099540619,
      "learning_rate": 0.003,
      "loss": 4.3062,
      "step": 1246
    },
    {
      "epoch": 0.01247,
      "grad_norm": 0.8993491326759976,
      "learning_rate": 0.003,
      "loss": 4.3511,
      "step": 1247
    },
    {
      "epoch": 0.01248,
      "grad_norm": 0.908268119830267,
      "learning_rate": 0.003,
      "loss": 4.3379,
      "step": 1248
    },
    {
      "epoch": 0.01249,
      "grad_norm": 0.8800131758337482,
      "learning_rate": 0.003,
      "loss": 4.3278,
      "step": 1249
    },
    {
      "epoch": 0.0125,
      "grad_norm": 0.8337196500160362,
      "learning_rate": 0.003,
      "loss": 4.3052,
      "step": 1250
    },
    {
      "epoch": 0.01251,
      "grad_norm": 0.9249131658343839,
      "learning_rate": 0.003,
      "loss": 4.3424,
      "step": 1251
    },
    {
      "epoch": 0.01252,
      "grad_norm": 0.8524650338611843,
      "learning_rate": 0.003,
      "loss": 4.3172,
      "step": 1252
    },
    {
      "epoch": 0.01253,
      "grad_norm": 0.7163285364378025,
      "learning_rate": 0.003,
      "loss": 4.2931,
      "step": 1253
    },
    {
      "epoch": 0.01254,
      "grad_norm": 0.6991567907525832,
      "learning_rate": 0.003,
      "loss": 4.3341,
      "step": 1254
    },
    {
      "epoch": 0.01255,
      "grad_norm": 0.6949992430981787,
      "learning_rate": 0.003,
      "loss": 4.2856,
      "step": 1255
    },
    {
      "epoch": 0.01256,
      "grad_norm": 0.7100922258125638,
      "learning_rate": 0.003,
      "loss": 4.3118,
      "step": 1256
    },
    {
      "epoch": 0.01257,
      "grad_norm": 0.6612217096643498,
      "learning_rate": 0.003,
      "loss": 4.2941,
      "step": 1257
    },
    {
      "epoch": 0.01258,
      "grad_norm": 0.7747579760551054,
      "learning_rate": 0.003,
      "loss": 4.3107,
      "step": 1258
    },
    {
      "epoch": 0.01259,
      "grad_norm": 1.124107981669201,
      "learning_rate": 0.003,
      "loss": 4.3222,
      "step": 1259
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.9528743820284102,
      "learning_rate": 0.003,
      "loss": 4.3627,
      "step": 1260
    },
    {
      "epoch": 0.01261,
      "grad_norm": 0.842570464638209,
      "learning_rate": 0.003,
      "loss": 4.3486,
      "step": 1261
    },
    {
      "epoch": 0.01262,
      "grad_norm": 0.8393239407149024,
      "learning_rate": 0.003,
      "loss": 4.3397,
      "step": 1262
    },
    {
      "epoch": 0.01263,
      "grad_norm": 0.8299261180456607,
      "learning_rate": 0.003,
      "loss": 4.3362,
      "step": 1263
    },
    {
      "epoch": 0.01264,
      "grad_norm": 0.8141230470068299,
      "learning_rate": 0.003,
      "loss": 4.3187,
      "step": 1264
    },
    {
      "epoch": 0.01265,
      "grad_norm": 1.0141550421015684,
      "learning_rate": 0.003,
      "loss": 4.3395,
      "step": 1265
    },
    {
      "epoch": 0.01266,
      "grad_norm": 0.7612600647582426,
      "learning_rate": 0.003,
      "loss": 4.3297,
      "step": 1266
    },
    {
      "epoch": 0.01267,
      "grad_norm": 0.6187340841665547,
      "learning_rate": 0.003,
      "loss": 4.3447,
      "step": 1267
    },
    {
      "epoch": 0.01268,
      "grad_norm": 0.5822458314315527,
      "learning_rate": 0.003,
      "loss": 4.3275,
      "step": 1268
    },
    {
      "epoch": 0.01269,
      "grad_norm": 0.4492593229301203,
      "learning_rate": 0.003,
      "loss": 4.2855,
      "step": 1269
    },
    {
      "epoch": 0.0127,
      "grad_norm": 0.5016783253235925,
      "learning_rate": 0.003,
      "loss": 4.3213,
      "step": 1270
    },
    {
      "epoch": 0.01271,
      "grad_norm": 0.44977006545763915,
      "learning_rate": 0.003,
      "loss": 4.297,
      "step": 1271
    },
    {
      "epoch": 0.01272,
      "grad_norm": 0.43028280089396154,
      "learning_rate": 0.003,
      "loss": 4.291,
      "step": 1272
    },
    {
      "epoch": 0.01273,
      "grad_norm": 0.4520162301705406,
      "learning_rate": 0.003,
      "loss": 4.3158,
      "step": 1273
    },
    {
      "epoch": 0.01274,
      "grad_norm": 0.451409910695501,
      "learning_rate": 0.003,
      "loss": 4.3063,
      "step": 1274
    },
    {
      "epoch": 0.01275,
      "grad_norm": 0.4633201534438662,
      "learning_rate": 0.003,
      "loss": 4.2756,
      "step": 1275
    },
    {
      "epoch": 0.01276,
      "grad_norm": 0.4818283038515168,
      "learning_rate": 0.003,
      "loss": 4.2804,
      "step": 1276
    },
    {
      "epoch": 0.01277,
      "grad_norm": 0.5612989347677225,
      "learning_rate": 0.003,
      "loss": 4.2895,
      "step": 1277
    },
    {
      "epoch": 0.01278,
      "grad_norm": 0.6134565833773333,
      "learning_rate": 0.003,
      "loss": 4.2966,
      "step": 1278
    },
    {
      "epoch": 0.01279,
      "grad_norm": 0.548830310802857,
      "learning_rate": 0.003,
      "loss": 4.2778,
      "step": 1279
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.5192954436192417,
      "learning_rate": 0.003,
      "loss": 4.2736,
      "step": 1280
    },
    {
      "epoch": 0.01281,
      "grad_norm": 0.7079203023923951,
      "learning_rate": 0.003,
      "loss": 4.2645,
      "step": 1281
    },
    {
      "epoch": 0.01282,
      "grad_norm": 0.8640412732426497,
      "learning_rate": 0.003,
      "loss": 4.3216,
      "step": 1282
    },
    {
      "epoch": 0.01283,
      "grad_norm": 0.7685939231737142,
      "learning_rate": 0.003,
      "loss": 4.2859,
      "step": 1283
    },
    {
      "epoch": 0.01284,
      "grad_norm": 0.6000861053702143,
      "learning_rate": 0.003,
      "loss": 4.2792,
      "step": 1284
    },
    {
      "epoch": 0.01285,
      "grad_norm": 0.7254306454469097,
      "learning_rate": 0.003,
      "loss": 4.2881,
      "step": 1285
    },
    {
      "epoch": 0.01286,
      "grad_norm": 0.6733987067909273,
      "learning_rate": 0.003,
      "loss": 4.3069,
      "step": 1286
    },
    {
      "epoch": 0.01287,
      "grad_norm": 0.6347430576087306,
      "learning_rate": 0.003,
      "loss": 4.2706,
      "step": 1287
    },
    {
      "epoch": 0.01288,
      "grad_norm": 0.6742039696219508,
      "learning_rate": 0.003,
      "loss": 4.2877,
      "step": 1288
    },
    {
      "epoch": 0.01289,
      "grad_norm": 0.7964754563506287,
      "learning_rate": 0.003,
      "loss": 4.2851,
      "step": 1289
    },
    {
      "epoch": 0.0129,
      "grad_norm": 0.7466700913788555,
      "learning_rate": 0.003,
      "loss": 4.3108,
      "step": 1290
    },
    {
      "epoch": 0.01291,
      "grad_norm": 0.7257835946382555,
      "learning_rate": 0.003,
      "loss": 4.2878,
      "step": 1291
    },
    {
      "epoch": 0.01292,
      "grad_norm": 0.7356576793031216,
      "learning_rate": 0.003,
      "loss": 4.3222,
      "step": 1292
    },
    {
      "epoch": 0.01293,
      "grad_norm": 0.7652258994712906,
      "learning_rate": 0.003,
      "loss": 4.312,
      "step": 1293
    },
    {
      "epoch": 0.01294,
      "grad_norm": 0.8074663392977137,
      "learning_rate": 0.003,
      "loss": 4.2851,
      "step": 1294
    },
    {
      "epoch": 0.01295,
      "grad_norm": 0.8419246282427761,
      "learning_rate": 0.003,
      "loss": 4.3171,
      "step": 1295
    },
    {
      "epoch": 0.01296,
      "grad_norm": 0.9067886743130651,
      "learning_rate": 0.003,
      "loss": 4.2936,
      "step": 1296
    },
    {
      "epoch": 0.01297,
      "grad_norm": 0.9867944428505203,
      "learning_rate": 0.003,
      "loss": 4.3168,
      "step": 1297
    },
    {
      "epoch": 0.01298,
      "grad_norm": 0.9008581643124973,
      "learning_rate": 0.003,
      "loss": 4.2927,
      "step": 1298
    },
    {
      "epoch": 0.01299,
      "grad_norm": 0.9659427448976995,
      "learning_rate": 0.003,
      "loss": 4.3085,
      "step": 1299
    },
    {
      "epoch": 0.013,
      "grad_norm": 1.004138764919463,
      "learning_rate": 0.003,
      "loss": 4.3011,
      "step": 1300
    },
    {
      "epoch": 0.01301,
      "grad_norm": 0.9518499756671687,
      "learning_rate": 0.003,
      "loss": 4.3191,
      "step": 1301
    },
    {
      "epoch": 0.01302,
      "grad_norm": 0.8711824338075493,
      "learning_rate": 0.003,
      "loss": 4.361,
      "step": 1302
    },
    {
      "epoch": 0.01303,
      "grad_norm": 0.8676764239234932,
      "learning_rate": 0.003,
      "loss": 4.3063,
      "step": 1303
    },
    {
      "epoch": 0.01304,
      "grad_norm": 0.7125360490503003,
      "learning_rate": 0.003,
      "loss": 4.3284,
      "step": 1304
    },
    {
      "epoch": 0.01305,
      "grad_norm": 0.6046564741110017,
      "learning_rate": 0.003,
      "loss": 4.3089,
      "step": 1305
    },
    {
      "epoch": 0.01306,
      "grad_norm": 0.5647927538938984,
      "learning_rate": 0.003,
      "loss": 4.2992,
      "step": 1306
    },
    {
      "epoch": 0.01307,
      "grad_norm": 0.5913576309809552,
      "learning_rate": 0.003,
      "loss": 4.3149,
      "step": 1307
    },
    {
      "epoch": 0.01308,
      "grad_norm": 0.5924251650199668,
      "learning_rate": 0.003,
      "loss": 4.2932,
      "step": 1308
    },
    {
      "epoch": 0.01309,
      "grad_norm": 0.507504674715556,
      "learning_rate": 0.003,
      "loss": 4.326,
      "step": 1309
    },
    {
      "epoch": 0.0131,
      "grad_norm": 0.6387323300673942,
      "learning_rate": 0.003,
      "loss": 4.2756,
      "step": 1310
    },
    {
      "epoch": 0.01311,
      "grad_norm": 0.7281996988052075,
      "learning_rate": 0.003,
      "loss": 4.3202,
      "step": 1311
    },
    {
      "epoch": 0.01312,
      "grad_norm": 0.8834351506579509,
      "learning_rate": 0.003,
      "loss": 4.2908,
      "step": 1312
    },
    {
      "epoch": 0.01313,
      "grad_norm": 0.840185011015699,
      "learning_rate": 0.003,
      "loss": 4.2898,
      "step": 1313
    },
    {
      "epoch": 0.01314,
      "grad_norm": 0.6830490768340751,
      "learning_rate": 0.003,
      "loss": 4.2745,
      "step": 1314
    },
    {
      "epoch": 0.01315,
      "grad_norm": 0.6358981723627178,
      "learning_rate": 0.003,
      "loss": 4.2992,
      "step": 1315
    },
    {
      "epoch": 0.01316,
      "grad_norm": 0.6007791887068558,
      "learning_rate": 0.003,
      "loss": 4.2829,
      "step": 1316
    },
    {
      "epoch": 0.01317,
      "grad_norm": 0.5048125779943076,
      "learning_rate": 0.003,
      "loss": 4.3051,
      "step": 1317
    },
    {
      "epoch": 0.01318,
      "grad_norm": 0.49450675942854344,
      "learning_rate": 0.003,
      "loss": 4.2837,
      "step": 1318
    },
    {
      "epoch": 0.01319,
      "grad_norm": 0.5258905508186564,
      "learning_rate": 0.003,
      "loss": 4.292,
      "step": 1319
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.5585710036479011,
      "learning_rate": 0.003,
      "loss": 4.2888,
      "step": 1320
    },
    {
      "epoch": 0.01321,
      "grad_norm": 0.5365220915283813,
      "learning_rate": 0.003,
      "loss": 4.2608,
      "step": 1321
    },
    {
      "epoch": 0.01322,
      "grad_norm": 0.5557887511106342,
      "learning_rate": 0.003,
      "loss": 4.292,
      "step": 1322
    },
    {
      "epoch": 0.01323,
      "grad_norm": 0.5531377716759448,
      "learning_rate": 0.003,
      "loss": 4.2837,
      "step": 1323
    },
    {
      "epoch": 0.01324,
      "grad_norm": 0.48156733948461256,
      "learning_rate": 0.003,
      "loss": 4.2672,
      "step": 1324
    },
    {
      "epoch": 0.01325,
      "grad_norm": 0.4219074369900267,
      "learning_rate": 0.003,
      "loss": 4.2693,
      "step": 1325
    },
    {
      "epoch": 0.01326,
      "grad_norm": 0.43890778305013683,
      "learning_rate": 0.003,
      "loss": 4.2559,
      "step": 1326
    },
    {
      "epoch": 0.01327,
      "grad_norm": 0.4164892745664214,
      "learning_rate": 0.003,
      "loss": 4.2724,
      "step": 1327
    },
    {
      "epoch": 0.01328,
      "grad_norm": 0.49229273243603294,
      "learning_rate": 0.003,
      "loss": 4.2646,
      "step": 1328
    },
    {
      "epoch": 0.01329,
      "grad_norm": 0.6121730970661665,
      "learning_rate": 0.003,
      "loss": 4.2329,
      "step": 1329
    },
    {
      "epoch": 0.0133,
      "grad_norm": 0.8848354653817555,
      "learning_rate": 0.003,
      "loss": 4.282,
      "step": 1330
    },
    {
      "epoch": 0.01331,
      "grad_norm": 1.0334105334221095,
      "learning_rate": 0.003,
      "loss": 4.2945,
      "step": 1331
    },
    {
      "epoch": 0.01332,
      "grad_norm": 0.8234894915328277,
      "learning_rate": 0.003,
      "loss": 4.2994,
      "step": 1332
    },
    {
      "epoch": 0.01333,
      "grad_norm": 0.7856986790322864,
      "learning_rate": 0.003,
      "loss": 4.2716,
      "step": 1333
    },
    {
      "epoch": 0.01334,
      "grad_norm": 0.7455452553988269,
      "learning_rate": 0.003,
      "loss": 4.2662,
      "step": 1334
    },
    {
      "epoch": 0.01335,
      "grad_norm": 0.623751194689146,
      "learning_rate": 0.003,
      "loss": 4.2787,
      "step": 1335
    },
    {
      "epoch": 0.01336,
      "grad_norm": 0.8238963440270202,
      "learning_rate": 0.003,
      "loss": 4.2839,
      "step": 1336
    },
    {
      "epoch": 0.01337,
      "grad_norm": 0.8159528784128957,
      "learning_rate": 0.003,
      "loss": 4.3035,
      "step": 1337
    },
    {
      "epoch": 0.01338,
      "grad_norm": 0.691075290250966,
      "learning_rate": 0.003,
      "loss": 4.2694,
      "step": 1338
    },
    {
      "epoch": 0.01339,
      "grad_norm": 0.6872888519447673,
      "learning_rate": 0.003,
      "loss": 4.2893,
      "step": 1339
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.6938872016813308,
      "learning_rate": 0.003,
      "loss": 4.2701,
      "step": 1340
    },
    {
      "epoch": 0.01341,
      "grad_norm": 0.7051901323106005,
      "learning_rate": 0.003,
      "loss": 4.2883,
      "step": 1341
    },
    {
      "epoch": 0.01342,
      "grad_norm": 0.7365511131614688,
      "learning_rate": 0.003,
      "loss": 4.257,
      "step": 1342
    },
    {
      "epoch": 0.01343,
      "grad_norm": 0.7564141855011662,
      "learning_rate": 0.003,
      "loss": 4.2701,
      "step": 1343
    },
    {
      "epoch": 0.01344,
      "grad_norm": 0.695387503279461,
      "learning_rate": 0.003,
      "loss": 4.2649,
      "step": 1344
    },
    {
      "epoch": 0.01345,
      "grad_norm": 0.6914588150610467,
      "learning_rate": 0.003,
      "loss": 4.3033,
      "step": 1345
    },
    {
      "epoch": 0.01346,
      "grad_norm": 0.6768362060970508,
      "learning_rate": 0.003,
      "loss": 4.277,
      "step": 1346
    },
    {
      "epoch": 0.01347,
      "grad_norm": 0.5492073753536512,
      "learning_rate": 0.003,
      "loss": 4.2633,
      "step": 1347
    },
    {
      "epoch": 0.01348,
      "grad_norm": 0.5626283903224933,
      "learning_rate": 0.003,
      "loss": 4.2635,
      "step": 1348
    },
    {
      "epoch": 0.01349,
      "grad_norm": 0.6438865489267382,
      "learning_rate": 0.003,
      "loss": 4.2856,
      "step": 1349
    },
    {
      "epoch": 0.0135,
      "grad_norm": 0.7199005579630849,
      "learning_rate": 0.003,
      "loss": 4.2796,
      "step": 1350
    },
    {
      "epoch": 0.01351,
      "grad_norm": 0.5428932527008233,
      "learning_rate": 0.003,
      "loss": 4.3038,
      "step": 1351
    },
    {
      "epoch": 0.01352,
      "grad_norm": 0.5284901745728212,
      "learning_rate": 0.003,
      "loss": 4.2779,
      "step": 1352
    },
    {
      "epoch": 0.01353,
      "grad_norm": 0.5600878945602594,
      "learning_rate": 0.003,
      "loss": 4.2635,
      "step": 1353
    },
    {
      "epoch": 0.01354,
      "grad_norm": 0.6391444612725596,
      "learning_rate": 0.003,
      "loss": 4.2525,
      "step": 1354
    },
    {
      "epoch": 0.01355,
      "grad_norm": 0.5641606375474643,
      "learning_rate": 0.003,
      "loss": 4.2634,
      "step": 1355
    },
    {
      "epoch": 0.01356,
      "grad_norm": 0.5080744885475224,
      "learning_rate": 0.003,
      "loss": 4.2545,
      "step": 1356
    },
    {
      "epoch": 0.01357,
      "grad_norm": 0.5093639445431166,
      "learning_rate": 0.003,
      "loss": 4.2441,
      "step": 1357
    },
    {
      "epoch": 0.01358,
      "grad_norm": 0.5049437394862782,
      "learning_rate": 0.003,
      "loss": 4.2383,
      "step": 1358
    },
    {
      "epoch": 0.01359,
      "grad_norm": 0.534762392832106,
      "learning_rate": 0.003,
      "loss": 4.2789,
      "step": 1359
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.5654562399234901,
      "learning_rate": 0.003,
      "loss": 4.2447,
      "step": 1360
    },
    {
      "epoch": 0.01361,
      "grad_norm": 0.7911685516807686,
      "learning_rate": 0.003,
      "loss": 4.2666,
      "step": 1361
    },
    {
      "epoch": 0.01362,
      "grad_norm": 1.133172158468294,
      "learning_rate": 0.003,
      "loss": 4.3073,
      "step": 1362
    },
    {
      "epoch": 0.01363,
      "grad_norm": 0.9631930238400119,
      "learning_rate": 0.003,
      "loss": 4.2987,
      "step": 1363
    },
    {
      "epoch": 0.01364,
      "grad_norm": 0.7443319461425727,
      "learning_rate": 0.003,
      "loss": 4.2839,
      "step": 1364
    },
    {
      "epoch": 0.01365,
      "grad_norm": 0.7368707977522249,
      "learning_rate": 0.003,
      "loss": 4.3041,
      "step": 1365
    },
    {
      "epoch": 0.01366,
      "grad_norm": 0.6506933778775009,
      "learning_rate": 0.003,
      "loss": 4.255,
      "step": 1366
    },
    {
      "epoch": 0.01367,
      "grad_norm": 0.7425630695378723,
      "learning_rate": 0.003,
      "loss": 4.3072,
      "step": 1367
    },
    {
      "epoch": 0.01368,
      "grad_norm": 0.7495522896004446,
      "learning_rate": 0.003,
      "loss": 4.2833,
      "step": 1368
    },
    {
      "epoch": 0.01369,
      "grad_norm": 0.7542187487500857,
      "learning_rate": 0.003,
      "loss": 4.2721,
      "step": 1369
    },
    {
      "epoch": 0.0137,
      "grad_norm": 0.9330809300322568,
      "learning_rate": 0.003,
      "loss": 4.3029,
      "step": 1370
    },
    {
      "epoch": 0.01371,
      "grad_norm": 1.2015218016331504,
      "learning_rate": 0.003,
      "loss": 4.3156,
      "step": 1371
    },
    {
      "epoch": 0.01372,
      "grad_norm": 1.0453049109497266,
      "learning_rate": 0.003,
      "loss": 4.2577,
      "step": 1372
    },
    {
      "epoch": 0.01373,
      "grad_norm": 1.086666562724284,
      "learning_rate": 0.003,
      "loss": 4.308,
      "step": 1373
    },
    {
      "epoch": 0.01374,
      "grad_norm": 1.0320592526365153,
      "learning_rate": 0.003,
      "loss": 4.3227,
      "step": 1374
    },
    {
      "epoch": 0.01375,
      "grad_norm": 1.1967086046120219,
      "learning_rate": 0.003,
      "loss": 4.2884,
      "step": 1375
    },
    {
      "epoch": 0.01376,
      "grad_norm": 0.8548157633990001,
      "learning_rate": 0.003,
      "loss": 4.307,
      "step": 1376
    },
    {
      "epoch": 0.01377,
      "grad_norm": 0.9102072373918407,
      "learning_rate": 0.003,
      "loss": 4.2985,
      "step": 1377
    },
    {
      "epoch": 0.01378,
      "grad_norm": 0.9843975361934545,
      "learning_rate": 0.003,
      "loss": 4.3358,
      "step": 1378
    },
    {
      "epoch": 0.01379,
      "grad_norm": 1.0283723711953892,
      "learning_rate": 0.003,
      "loss": 4.3102,
      "step": 1379
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.9006954176701093,
      "learning_rate": 0.003,
      "loss": 4.3045,
      "step": 1380
    },
    {
      "epoch": 0.01381,
      "grad_norm": 1.123717795950056,
      "learning_rate": 0.003,
      "loss": 4.3217,
      "step": 1381
    },
    {
      "epoch": 0.01382,
      "grad_norm": 1.0986902300743402,
      "learning_rate": 0.003,
      "loss": 4.3218,
      "step": 1382
    },
    {
      "epoch": 0.01383,
      "grad_norm": 0.9410643097534779,
      "learning_rate": 0.003,
      "loss": 4.3113,
      "step": 1383
    },
    {
      "epoch": 0.01384,
      "grad_norm": 0.8436673398281763,
      "learning_rate": 0.003,
      "loss": 4.3327,
      "step": 1384
    },
    {
      "epoch": 0.01385,
      "grad_norm": 0.8422095588989059,
      "learning_rate": 0.003,
      "loss": 4.3233,
      "step": 1385
    },
    {
      "epoch": 0.01386,
      "grad_norm": 0.9633211233231406,
      "learning_rate": 0.003,
      "loss": 4.3006,
      "step": 1386
    },
    {
      "epoch": 0.01387,
      "grad_norm": 1.2716300561274656,
      "learning_rate": 0.003,
      "loss": 4.3052,
      "step": 1387
    },
    {
      "epoch": 0.01388,
      "grad_norm": 0.786746450686237,
      "learning_rate": 0.003,
      "loss": 4.33,
      "step": 1388
    },
    {
      "epoch": 0.01389,
      "grad_norm": 0.763957561776965,
      "learning_rate": 0.003,
      "loss": 4.3069,
      "step": 1389
    },
    {
      "epoch": 0.0139,
      "grad_norm": 0.7874363277644975,
      "learning_rate": 0.003,
      "loss": 4.3041,
      "step": 1390
    },
    {
      "epoch": 0.01391,
      "grad_norm": 0.7493071050038139,
      "learning_rate": 0.003,
      "loss": 4.3036,
      "step": 1391
    },
    {
      "epoch": 0.01392,
      "grad_norm": 0.5930612778435612,
      "learning_rate": 0.003,
      "loss": 4.3076,
      "step": 1392
    },
    {
      "epoch": 0.01393,
      "grad_norm": 0.5012167316836135,
      "learning_rate": 0.003,
      "loss": 4.2738,
      "step": 1393
    },
    {
      "epoch": 0.01394,
      "grad_norm": 0.4948661937091383,
      "learning_rate": 0.003,
      "loss": 4.2568,
      "step": 1394
    },
    {
      "epoch": 0.01395,
      "grad_norm": 0.4622888611133768,
      "learning_rate": 0.003,
      "loss": 4.2868,
      "step": 1395
    },
    {
      "epoch": 0.01396,
      "grad_norm": 0.4205886993889684,
      "learning_rate": 0.003,
      "loss": 4.275,
      "step": 1396
    },
    {
      "epoch": 0.01397,
      "grad_norm": 0.47767051412590744,
      "learning_rate": 0.003,
      "loss": 4.2862,
      "step": 1397
    },
    {
      "epoch": 0.01398,
      "grad_norm": 0.417341308437963,
      "learning_rate": 0.003,
      "loss": 4.2647,
      "step": 1398
    },
    {
      "epoch": 0.01399,
      "grad_norm": 0.38332601666132154,
      "learning_rate": 0.003,
      "loss": 4.2835,
      "step": 1399
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.49255586643940574,
      "learning_rate": 0.003,
      "loss": 4.2444,
      "step": 1400
    },
    {
      "epoch": 0.01401,
      "grad_norm": 0.8450629843994065,
      "learning_rate": 0.003,
      "loss": 4.2739,
      "step": 1401
    },
    {
      "epoch": 0.01402,
      "grad_norm": 1.0913061976723808,
      "learning_rate": 0.003,
      "loss": 4.3172,
      "step": 1402
    },
    {
      "epoch": 0.01403,
      "grad_norm": 0.6222948640126749,
      "learning_rate": 0.003,
      "loss": 4.2634,
      "step": 1403
    },
    {
      "epoch": 0.01404,
      "grad_norm": 0.6692390535634434,
      "learning_rate": 0.003,
      "loss": 4.291,
      "step": 1404
    },
    {
      "epoch": 0.01405,
      "grad_norm": 0.531565118139576,
      "learning_rate": 0.003,
      "loss": 4.2997,
      "step": 1405
    },
    {
      "epoch": 0.01406,
      "grad_norm": 0.4824477961295427,
      "learning_rate": 0.003,
      "loss": 4.2779,
      "step": 1406
    },
    {
      "epoch": 0.01407,
      "grad_norm": 0.5759838031356613,
      "learning_rate": 0.003,
      "loss": 4.2942,
      "step": 1407
    },
    {
      "epoch": 0.01408,
      "grad_norm": 0.4476558981995588,
      "learning_rate": 0.003,
      "loss": 4.245,
      "step": 1408
    },
    {
      "epoch": 0.01409,
      "grad_norm": 0.43291671999910936,
      "learning_rate": 0.003,
      "loss": 4.2753,
      "step": 1409
    },
    {
      "epoch": 0.0141,
      "grad_norm": 0.48354199581441126,
      "learning_rate": 0.003,
      "loss": 4.2565,
      "step": 1410
    },
    {
      "epoch": 0.01411,
      "grad_norm": 0.4777579756136276,
      "learning_rate": 0.003,
      "loss": 4.2302,
      "step": 1411
    },
    {
      "epoch": 0.01412,
      "grad_norm": 0.5224654831649898,
      "learning_rate": 0.003,
      "loss": 4.2693,
      "step": 1412
    },
    {
      "epoch": 0.01413,
      "grad_norm": 0.5669597734894755,
      "learning_rate": 0.003,
      "loss": 4.2639,
      "step": 1413
    },
    {
      "epoch": 0.01414,
      "grad_norm": 0.5645597477136786,
      "learning_rate": 0.003,
      "loss": 4.2453,
      "step": 1414
    },
    {
      "epoch": 0.01415,
      "grad_norm": 0.5887759339552011,
      "learning_rate": 0.003,
      "loss": 4.238,
      "step": 1415
    },
    {
      "epoch": 0.01416,
      "grad_norm": 0.6104838140849604,
      "learning_rate": 0.003,
      "loss": 4.2641,
      "step": 1416
    },
    {
      "epoch": 0.01417,
      "grad_norm": 0.6908167320390044,
      "learning_rate": 0.003,
      "loss": 4.2559,
      "step": 1417
    },
    {
      "epoch": 0.01418,
      "grad_norm": 0.8241127076876378,
      "learning_rate": 0.003,
      "loss": 4.28,
      "step": 1418
    },
    {
      "epoch": 0.01419,
      "grad_norm": 0.8554302831681504,
      "learning_rate": 0.003,
      "loss": 4.2733,
      "step": 1419
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.8153359619668534,
      "learning_rate": 0.003,
      "loss": 4.28,
      "step": 1420
    },
    {
      "epoch": 0.01421,
      "grad_norm": 0.808004884443839,
      "learning_rate": 0.003,
      "loss": 4.2527,
      "step": 1421
    },
    {
      "epoch": 0.01422,
      "grad_norm": 0.6302573298023686,
      "learning_rate": 0.003,
      "loss": 4.2516,
      "step": 1422
    },
    {
      "epoch": 0.01423,
      "grad_norm": 0.6834582033971294,
      "learning_rate": 0.003,
      "loss": 4.2585,
      "step": 1423
    },
    {
      "epoch": 0.01424,
      "grad_norm": 0.7665806417295189,
      "learning_rate": 0.003,
      "loss": 4.2761,
      "step": 1424
    },
    {
      "epoch": 0.01425,
      "grad_norm": 0.8479056405573007,
      "learning_rate": 0.003,
      "loss": 4.2857,
      "step": 1425
    },
    {
      "epoch": 0.01426,
      "grad_norm": 0.8307259541179206,
      "learning_rate": 0.003,
      "loss": 4.2636,
      "step": 1426
    },
    {
      "epoch": 0.01427,
      "grad_norm": 0.7328418116871758,
      "learning_rate": 0.003,
      "loss": 4.257,
      "step": 1427
    },
    {
      "epoch": 0.01428,
      "grad_norm": 0.834478644245489,
      "learning_rate": 0.003,
      "loss": 4.2596,
      "step": 1428
    },
    {
      "epoch": 0.01429,
      "grad_norm": 0.8679311866430979,
      "learning_rate": 0.003,
      "loss": 4.2742,
      "step": 1429
    },
    {
      "epoch": 0.0143,
      "grad_norm": 0.9567780417694972,
      "learning_rate": 0.003,
      "loss": 4.2902,
      "step": 1430
    },
    {
      "epoch": 0.01431,
      "grad_norm": 0.99274156172833,
      "learning_rate": 0.003,
      "loss": 4.2901,
      "step": 1431
    },
    {
      "epoch": 0.01432,
      "grad_norm": 0.9575505507003149,
      "learning_rate": 0.003,
      "loss": 4.2782,
      "step": 1432
    },
    {
      "epoch": 0.01433,
      "grad_norm": 1.0120649680762432,
      "learning_rate": 0.003,
      "loss": 4.2802,
      "step": 1433
    },
    {
      "epoch": 0.01434,
      "grad_norm": 0.9698542777423387,
      "learning_rate": 0.003,
      "loss": 4.2694,
      "step": 1434
    },
    {
      "epoch": 0.01435,
      "grad_norm": 0.8592446572371273,
      "learning_rate": 0.003,
      "loss": 4.2745,
      "step": 1435
    },
    {
      "epoch": 0.01436,
      "grad_norm": 0.7299491282894184,
      "learning_rate": 0.003,
      "loss": 4.2863,
      "step": 1436
    },
    {
      "epoch": 0.01437,
      "grad_norm": 0.6523703034324407,
      "learning_rate": 0.003,
      "loss": 4.296,
      "step": 1437
    },
    {
      "epoch": 0.01438,
      "grad_norm": 0.6201165515106615,
      "learning_rate": 0.003,
      "loss": 4.2864,
      "step": 1438
    },
    {
      "epoch": 0.01439,
      "grad_norm": 0.6536973612494624,
      "learning_rate": 0.003,
      "loss": 4.25,
      "step": 1439
    },
    {
      "epoch": 0.0144,
      "grad_norm": 0.729776581664813,
      "learning_rate": 0.003,
      "loss": 4.2648,
      "step": 1440
    },
    {
      "epoch": 0.01441,
      "grad_norm": 0.7206829643898741,
      "learning_rate": 0.003,
      "loss": 4.2702,
      "step": 1441
    },
    {
      "epoch": 0.01442,
      "grad_norm": 0.7419558696417659,
      "learning_rate": 0.003,
      "loss": 4.2561,
      "step": 1442
    },
    {
      "epoch": 0.01443,
      "grad_norm": 0.8093095130841327,
      "learning_rate": 0.003,
      "loss": 4.3153,
      "step": 1443
    },
    {
      "epoch": 0.01444,
      "grad_norm": 0.7933277975079275,
      "learning_rate": 0.003,
      "loss": 4.2825,
      "step": 1444
    },
    {
      "epoch": 0.01445,
      "grad_norm": 0.7264821716906565,
      "learning_rate": 0.003,
      "loss": 4.2399,
      "step": 1445
    },
    {
      "epoch": 0.01446,
      "grad_norm": 0.6240641235999153,
      "learning_rate": 0.003,
      "loss": 4.2506,
      "step": 1446
    },
    {
      "epoch": 0.01447,
      "grad_norm": 0.5288921284639453,
      "learning_rate": 0.003,
      "loss": 4.256,
      "step": 1447
    },
    {
      "epoch": 0.01448,
      "grad_norm": 0.534171003657093,
      "learning_rate": 0.003,
      "loss": 4.2664,
      "step": 1448
    },
    {
      "epoch": 0.01449,
      "grad_norm": 0.504252276028137,
      "learning_rate": 0.003,
      "loss": 4.2565,
      "step": 1449
    },
    {
      "epoch": 0.0145,
      "grad_norm": 0.44556324681710546,
      "learning_rate": 0.003,
      "loss": 4.27,
      "step": 1450
    },
    {
      "epoch": 0.01451,
      "grad_norm": 0.4269835752600227,
      "learning_rate": 0.003,
      "loss": 4.2459,
      "step": 1451
    },
    {
      "epoch": 0.01452,
      "grad_norm": 0.43297277393213274,
      "learning_rate": 0.003,
      "loss": 4.2457,
      "step": 1452
    },
    {
      "epoch": 0.01453,
      "grad_norm": 0.48066055951097997,
      "learning_rate": 0.003,
      "loss": 4.2578,
      "step": 1453
    },
    {
      "epoch": 0.01454,
      "grad_norm": 0.7307010489462921,
      "learning_rate": 0.003,
      "loss": 4.2308,
      "step": 1454
    },
    {
      "epoch": 0.01455,
      "grad_norm": 1.061032754030789,
      "learning_rate": 0.003,
      "loss": 4.2921,
      "step": 1455
    },
    {
      "epoch": 0.01456,
      "grad_norm": 0.8307341818819957,
      "learning_rate": 0.003,
      "loss": 4.2586,
      "step": 1456
    },
    {
      "epoch": 0.01457,
      "grad_norm": 0.6035503218291812,
      "learning_rate": 0.003,
      "loss": 4.2382,
      "step": 1457
    },
    {
      "epoch": 0.01458,
      "grad_norm": 0.6933692267109066,
      "learning_rate": 0.003,
      "loss": 4.242,
      "step": 1458
    },
    {
      "epoch": 0.01459,
      "grad_norm": 0.6220443333798514,
      "learning_rate": 0.003,
      "loss": 4.2607,
      "step": 1459
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.8143223428105976,
      "learning_rate": 0.003,
      "loss": 4.2715,
      "step": 1460
    },
    {
      "epoch": 0.01461,
      "grad_norm": 0.7987144915177533,
      "learning_rate": 0.003,
      "loss": 4.2513,
      "step": 1461
    },
    {
      "epoch": 0.01462,
      "grad_norm": 0.6408744129720689,
      "learning_rate": 0.003,
      "loss": 4.2512,
      "step": 1462
    },
    {
      "epoch": 0.01463,
      "grad_norm": 0.7757864438293093,
      "learning_rate": 0.003,
      "loss": 4.2548,
      "step": 1463
    },
    {
      "epoch": 0.01464,
      "grad_norm": 0.7699033474111219,
      "learning_rate": 0.003,
      "loss": 4.2388,
      "step": 1464
    },
    {
      "epoch": 0.01465,
      "grad_norm": 0.5797229129062035,
      "learning_rate": 0.003,
      "loss": 4.2736,
      "step": 1465
    },
    {
      "epoch": 0.01466,
      "grad_norm": 0.611021916593894,
      "learning_rate": 0.003,
      "loss": 4.238,
      "step": 1466
    },
    {
      "epoch": 0.01467,
      "grad_norm": 0.7716393804810123,
      "learning_rate": 0.003,
      "loss": 4.2809,
      "step": 1467
    },
    {
      "epoch": 0.01468,
      "grad_norm": 0.684598948135191,
      "learning_rate": 0.003,
      "loss": 4.2576,
      "step": 1468
    },
    {
      "epoch": 0.01469,
      "grad_norm": 0.5983303592308452,
      "learning_rate": 0.003,
      "loss": 4.2537,
      "step": 1469
    },
    {
      "epoch": 0.0147,
      "grad_norm": 0.5951720244159906,
      "learning_rate": 0.003,
      "loss": 4.246,
      "step": 1470
    },
    {
      "epoch": 0.01471,
      "grad_norm": 0.595230494931454,
      "learning_rate": 0.003,
      "loss": 4.2579,
      "step": 1471
    },
    {
      "epoch": 0.01472,
      "grad_norm": 0.5179540783713997,
      "learning_rate": 0.003,
      "loss": 4.2385,
      "step": 1472
    },
    {
      "epoch": 0.01473,
      "grad_norm": 0.513255678190744,
      "learning_rate": 0.003,
      "loss": 4.2524,
      "step": 1473
    },
    {
      "epoch": 0.01474,
      "grad_norm": 0.5312567488001211,
      "learning_rate": 0.003,
      "loss": 4.2368,
      "step": 1474
    },
    {
      "epoch": 0.01475,
      "grad_norm": 0.5231277112786824,
      "learning_rate": 0.003,
      "loss": 4.2547,
      "step": 1475
    },
    {
      "epoch": 0.01476,
      "grad_norm": 0.524915592276574,
      "learning_rate": 0.003,
      "loss": 4.2412,
      "step": 1476
    },
    {
      "epoch": 0.01477,
      "grad_norm": 0.4868094804842935,
      "learning_rate": 0.003,
      "loss": 4.2453,
      "step": 1477
    },
    {
      "epoch": 0.01478,
      "grad_norm": 0.46072622581307954,
      "learning_rate": 0.003,
      "loss": 4.2277,
      "step": 1478
    },
    {
      "epoch": 0.01479,
      "grad_norm": 0.5672195126213446,
      "learning_rate": 0.003,
      "loss": 4.2625,
      "step": 1479
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.7550630041249586,
      "learning_rate": 0.003,
      "loss": 4.2383,
      "step": 1480
    },
    {
      "epoch": 0.01481,
      "grad_norm": 0.9561266542090932,
      "learning_rate": 0.003,
      "loss": 4.2523,
      "step": 1481
    },
    {
      "epoch": 0.01482,
      "grad_norm": 1.4635954400671285,
      "learning_rate": 0.003,
      "loss": 4.2602,
      "step": 1482
    },
    {
      "epoch": 0.01483,
      "grad_norm": 0.7350989176290839,
      "learning_rate": 0.003,
      "loss": 4.2401,
      "step": 1483
    },
    {
      "epoch": 0.01484,
      "grad_norm": 0.687260607047077,
      "learning_rate": 0.003,
      "loss": 4.2906,
      "step": 1484
    },
    {
      "epoch": 0.01485,
      "grad_norm": 0.6759306581993629,
      "learning_rate": 0.003,
      "loss": 4.2492,
      "step": 1485
    },
    {
      "epoch": 0.01486,
      "grad_norm": 0.5738231891215967,
      "learning_rate": 0.003,
      "loss": 4.2438,
      "step": 1486
    },
    {
      "epoch": 0.01487,
      "grad_norm": 0.67662740183017,
      "learning_rate": 0.003,
      "loss": 4.2486,
      "step": 1487
    },
    {
      "epoch": 0.01488,
      "grad_norm": 0.6465587043004464,
      "learning_rate": 0.003,
      "loss": 4.2584,
      "step": 1488
    },
    {
      "epoch": 0.01489,
      "grad_norm": 0.6278262825245318,
      "learning_rate": 0.003,
      "loss": 4.251,
      "step": 1489
    },
    {
      "epoch": 0.0149,
      "grad_norm": 0.7125204242602325,
      "learning_rate": 0.003,
      "loss": 4.2472,
      "step": 1490
    },
    {
      "epoch": 0.01491,
      "grad_norm": 0.739446677383364,
      "learning_rate": 0.003,
      "loss": 4.263,
      "step": 1491
    },
    {
      "epoch": 0.01492,
      "grad_norm": 0.7351492446611294,
      "learning_rate": 0.003,
      "loss": 4.2298,
      "step": 1492
    },
    {
      "epoch": 0.01493,
      "grad_norm": 0.7206112830630914,
      "learning_rate": 0.003,
      "loss": 4.2585,
      "step": 1493
    },
    {
      "epoch": 0.01494,
      "grad_norm": 0.7389347083990496,
      "learning_rate": 0.003,
      "loss": 4.239,
      "step": 1494
    },
    {
      "epoch": 0.01495,
      "grad_norm": 0.6924446817172788,
      "learning_rate": 0.003,
      "loss": 4.2476,
      "step": 1495
    },
    {
      "epoch": 0.01496,
      "grad_norm": 0.6954181917083473,
      "learning_rate": 0.003,
      "loss": 4.2266,
      "step": 1496
    },
    {
      "epoch": 0.01497,
      "grad_norm": 0.6994096236505176,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 1497
    },
    {
      "epoch": 0.01498,
      "grad_norm": 0.737027067879948,
      "learning_rate": 0.003,
      "loss": 4.2577,
      "step": 1498
    },
    {
      "epoch": 0.01499,
      "grad_norm": 0.8018965005148498,
      "learning_rate": 0.003,
      "loss": 4.2828,
      "step": 1499
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.673322646768884,
      "learning_rate": 0.003,
      "loss": 4.2302,
      "step": 1500
    },
    {
      "epoch": 0.01501,
      "grad_norm": 0.6899086847228731,
      "learning_rate": 0.003,
      "loss": 4.23,
      "step": 1501
    },
    {
      "epoch": 0.01502,
      "grad_norm": 0.6591283488970741,
      "learning_rate": 0.003,
      "loss": 4.2708,
      "step": 1502
    },
    {
      "epoch": 0.01503,
      "grad_norm": 0.6852371530334846,
      "learning_rate": 0.003,
      "loss": 4.2501,
      "step": 1503
    },
    {
      "epoch": 0.01504,
      "grad_norm": 0.6823538206406574,
      "learning_rate": 0.003,
      "loss": 4.2505,
      "step": 1504
    },
    {
      "epoch": 0.01505,
      "grad_norm": 0.7899988696399359,
      "learning_rate": 0.003,
      "loss": 4.2395,
      "step": 1505
    },
    {
      "epoch": 0.01506,
      "grad_norm": 0.8794700649213404,
      "learning_rate": 0.003,
      "loss": 4.2637,
      "step": 1506
    },
    {
      "epoch": 0.01507,
      "grad_norm": 0.8573682870616134,
      "learning_rate": 0.003,
      "loss": 4.2788,
      "step": 1507
    },
    {
      "epoch": 0.01508,
      "grad_norm": 0.9379452075670608,
      "learning_rate": 0.003,
      "loss": 4.257,
      "step": 1508
    },
    {
      "epoch": 0.01509,
      "grad_norm": 1.155120366213554,
      "learning_rate": 0.003,
      "loss": 4.2868,
      "step": 1509
    },
    {
      "epoch": 0.0151,
      "grad_norm": 0.8564988533087747,
      "learning_rate": 0.003,
      "loss": 4.2523,
      "step": 1510
    },
    {
      "epoch": 0.01511,
      "grad_norm": 0.7943851233402885,
      "learning_rate": 0.003,
      "loss": 4.242,
      "step": 1511
    },
    {
      "epoch": 0.01512,
      "grad_norm": 0.7290925664752087,
      "learning_rate": 0.003,
      "loss": 4.2483,
      "step": 1512
    },
    {
      "epoch": 0.01513,
      "grad_norm": 0.8302189630029514,
      "learning_rate": 0.003,
      "loss": 4.2538,
      "step": 1513
    },
    {
      "epoch": 0.01514,
      "grad_norm": 0.9180439778149861,
      "learning_rate": 0.003,
      "loss": 4.2658,
      "step": 1514
    },
    {
      "epoch": 0.01515,
      "grad_norm": 0.8252164561072897,
      "learning_rate": 0.003,
      "loss": 4.2572,
      "step": 1515
    },
    {
      "epoch": 0.01516,
      "grad_norm": 0.7235677802672953,
      "learning_rate": 0.003,
      "loss": 4.2628,
      "step": 1516
    },
    {
      "epoch": 0.01517,
      "grad_norm": 0.669771960126658,
      "learning_rate": 0.003,
      "loss": 4.2114,
      "step": 1517
    },
    {
      "epoch": 0.01518,
      "grad_norm": 0.6555410571298043,
      "learning_rate": 0.003,
      "loss": 4.248,
      "step": 1518
    },
    {
      "epoch": 0.01519,
      "grad_norm": 0.7596573642242733,
      "learning_rate": 0.003,
      "loss": 4.2822,
      "step": 1519
    },
    {
      "epoch": 0.0152,
      "grad_norm": 0.8963421668718663,
      "learning_rate": 0.003,
      "loss": 4.2455,
      "step": 1520
    },
    {
      "epoch": 0.01521,
      "grad_norm": 1.008660081317184,
      "learning_rate": 0.003,
      "loss": 4.2626,
      "step": 1521
    },
    {
      "epoch": 0.01522,
      "grad_norm": 1.0941342292979577,
      "learning_rate": 0.003,
      "loss": 4.2769,
      "step": 1522
    },
    {
      "epoch": 0.01523,
      "grad_norm": 0.8501402046319685,
      "learning_rate": 0.003,
      "loss": 4.2571,
      "step": 1523
    },
    {
      "epoch": 0.01524,
      "grad_norm": 0.9113983493960105,
      "learning_rate": 0.003,
      "loss": 4.2967,
      "step": 1524
    },
    {
      "epoch": 0.01525,
      "grad_norm": 0.7627206625135139,
      "learning_rate": 0.003,
      "loss": 4.2846,
      "step": 1525
    },
    {
      "epoch": 0.01526,
      "grad_norm": 0.7118386576342678,
      "learning_rate": 0.003,
      "loss": 4.2733,
      "step": 1526
    },
    {
      "epoch": 0.01527,
      "grad_norm": 0.6832379742298202,
      "learning_rate": 0.003,
      "loss": 4.2396,
      "step": 1527
    },
    {
      "epoch": 0.01528,
      "grad_norm": 0.6558129881498832,
      "learning_rate": 0.003,
      "loss": 4.2558,
      "step": 1528
    },
    {
      "epoch": 0.01529,
      "grad_norm": 0.653598932900876,
      "learning_rate": 0.003,
      "loss": 4.2506,
      "step": 1529
    },
    {
      "epoch": 0.0153,
      "grad_norm": 0.6481863699832326,
      "learning_rate": 0.003,
      "loss": 4.2569,
      "step": 1530
    },
    {
      "epoch": 0.01531,
      "grad_norm": 0.7137971422876086,
      "learning_rate": 0.003,
      "loss": 4.2516,
      "step": 1531
    },
    {
      "epoch": 0.01532,
      "grad_norm": 0.7682370963055534,
      "learning_rate": 0.003,
      "loss": 4.232,
      "step": 1532
    },
    {
      "epoch": 0.01533,
      "grad_norm": 0.6869067765486859,
      "learning_rate": 0.003,
      "loss": 4.2552,
      "step": 1533
    },
    {
      "epoch": 0.01534,
      "grad_norm": 0.6763479705568691,
      "learning_rate": 0.003,
      "loss": 4.251,
      "step": 1534
    },
    {
      "epoch": 0.01535,
      "grad_norm": 0.6278011204652878,
      "learning_rate": 0.003,
      "loss": 4.2624,
      "step": 1535
    },
    {
      "epoch": 0.01536,
      "grad_norm": 0.5449726408675756,
      "learning_rate": 0.003,
      "loss": 4.2371,
      "step": 1536
    },
    {
      "epoch": 0.01537,
      "grad_norm": 0.5702070777355899,
      "learning_rate": 0.003,
      "loss": 4.2348,
      "step": 1537
    },
    {
      "epoch": 0.01538,
      "grad_norm": 0.5568148406111664,
      "learning_rate": 0.003,
      "loss": 4.2106,
      "step": 1538
    },
    {
      "epoch": 0.01539,
      "grad_norm": 0.5990162223338897,
      "learning_rate": 0.003,
      "loss": 4.2411,
      "step": 1539
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.6709932169117396,
      "learning_rate": 0.003,
      "loss": 4.2471,
      "step": 1540
    },
    {
      "epoch": 0.01541,
      "grad_norm": 0.839479307713138,
      "learning_rate": 0.003,
      "loss": 4.2333,
      "step": 1541
    },
    {
      "epoch": 0.01542,
      "grad_norm": 0.998609935659052,
      "learning_rate": 0.003,
      "loss": 4.2723,
      "step": 1542
    },
    {
      "epoch": 0.01543,
      "grad_norm": 0.9175226186557726,
      "learning_rate": 0.003,
      "loss": 4.2597,
      "step": 1543
    },
    {
      "epoch": 0.01544,
      "grad_norm": 0.9698336862541426,
      "learning_rate": 0.003,
      "loss": 4.2493,
      "step": 1544
    },
    {
      "epoch": 0.01545,
      "grad_norm": 0.8646283897782253,
      "learning_rate": 0.003,
      "loss": 4.3133,
      "step": 1545
    },
    {
      "epoch": 0.01546,
      "grad_norm": 0.7489694669979721,
      "learning_rate": 0.003,
      "loss": 4.2786,
      "step": 1546
    },
    {
      "epoch": 0.01547,
      "grad_norm": 0.7346787237600204,
      "learning_rate": 0.003,
      "loss": 4.2793,
      "step": 1547
    },
    {
      "epoch": 0.01548,
      "grad_norm": 0.7843295792698941,
      "learning_rate": 0.003,
      "loss": 4.2451,
      "step": 1548
    },
    {
      "epoch": 0.01549,
      "grad_norm": 0.7547938162635378,
      "learning_rate": 0.003,
      "loss": 4.2323,
      "step": 1549
    },
    {
      "epoch": 0.0155,
      "grad_norm": 0.8033101017591833,
      "learning_rate": 0.003,
      "loss": 4.2543,
      "step": 1550
    },
    {
      "epoch": 0.01551,
      "grad_norm": 0.784324716245673,
      "learning_rate": 0.003,
      "loss": 4.2319,
      "step": 1551
    },
    {
      "epoch": 0.01552,
      "grad_norm": 0.6675659235069369,
      "learning_rate": 0.003,
      "loss": 4.2485,
      "step": 1552
    },
    {
      "epoch": 0.01553,
      "grad_norm": 0.7200584927165973,
      "learning_rate": 0.003,
      "loss": 4.2735,
      "step": 1553
    },
    {
      "epoch": 0.01554,
      "grad_norm": 0.833889265994982,
      "learning_rate": 0.003,
      "loss": 4.259,
      "step": 1554
    },
    {
      "epoch": 0.01555,
      "grad_norm": 0.7292713050790204,
      "learning_rate": 0.003,
      "loss": 4.2302,
      "step": 1555
    },
    {
      "epoch": 0.01556,
      "grad_norm": 0.6334535318603711,
      "learning_rate": 0.003,
      "loss": 4.2647,
      "step": 1556
    },
    {
      "epoch": 0.01557,
      "grad_norm": 0.6665206815174863,
      "learning_rate": 0.003,
      "loss": 4.2396,
      "step": 1557
    },
    {
      "epoch": 0.01558,
      "grad_norm": 0.5467983416304218,
      "learning_rate": 0.003,
      "loss": 4.2264,
      "step": 1558
    },
    {
      "epoch": 0.01559,
      "grad_norm": 0.5303338136727427,
      "learning_rate": 0.003,
      "loss": 4.2625,
      "step": 1559
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.5964427896199266,
      "learning_rate": 0.003,
      "loss": 4.2526,
      "step": 1560
    },
    {
      "epoch": 0.01561,
      "grad_norm": 0.7487394215839274,
      "learning_rate": 0.003,
      "loss": 4.2358,
      "step": 1561
    },
    {
      "epoch": 0.01562,
      "grad_norm": 0.8937121416966828,
      "learning_rate": 0.003,
      "loss": 4.2394,
      "step": 1562
    },
    {
      "epoch": 0.01563,
      "grad_norm": 1.2085214855734254,
      "learning_rate": 0.003,
      "loss": 4.2602,
      "step": 1563
    },
    {
      "epoch": 0.01564,
      "grad_norm": 0.8696563758847599,
      "learning_rate": 0.003,
      "loss": 4.284,
      "step": 1564
    },
    {
      "epoch": 0.01565,
      "grad_norm": 0.739740990652482,
      "learning_rate": 0.003,
      "loss": 4.2566,
      "step": 1565
    },
    {
      "epoch": 0.01566,
      "grad_norm": 0.7600676271323985,
      "learning_rate": 0.003,
      "loss": 4.2738,
      "step": 1566
    },
    {
      "epoch": 0.01567,
      "grad_norm": 0.7222900608043384,
      "learning_rate": 0.003,
      "loss": 4.2646,
      "step": 1567
    },
    {
      "epoch": 0.01568,
      "grad_norm": 0.7569225436342659,
      "learning_rate": 0.003,
      "loss": 4.2466,
      "step": 1568
    },
    {
      "epoch": 0.01569,
      "grad_norm": 0.7441561581750152,
      "learning_rate": 0.003,
      "loss": 4.252,
      "step": 1569
    },
    {
      "epoch": 0.0157,
      "grad_norm": 0.7580218902280629,
      "learning_rate": 0.003,
      "loss": 4.2727,
      "step": 1570
    },
    {
      "epoch": 0.01571,
      "grad_norm": 0.771634247408187,
      "learning_rate": 0.003,
      "loss": 4.2581,
      "step": 1571
    },
    {
      "epoch": 0.01572,
      "grad_norm": 0.95958549182467,
      "learning_rate": 0.003,
      "loss": 4.2713,
      "step": 1572
    },
    {
      "epoch": 0.01573,
      "grad_norm": 0.9029057415025205,
      "learning_rate": 0.003,
      "loss": 4.261,
      "step": 1573
    },
    {
      "epoch": 0.01574,
      "grad_norm": 0.8044062509450788,
      "learning_rate": 0.003,
      "loss": 4.2692,
      "step": 1574
    },
    {
      "epoch": 0.01575,
      "grad_norm": 0.7286639057606195,
      "learning_rate": 0.003,
      "loss": 4.2305,
      "step": 1575
    },
    {
      "epoch": 0.01576,
      "grad_norm": 0.6821195872107877,
      "learning_rate": 0.003,
      "loss": 4.2492,
      "step": 1576
    },
    {
      "epoch": 0.01577,
      "grad_norm": 0.6033878670810847,
      "learning_rate": 0.003,
      "loss": 4.2068,
      "step": 1577
    },
    {
      "epoch": 0.01578,
      "grad_norm": 0.519693589168086,
      "learning_rate": 0.003,
      "loss": 4.2343,
      "step": 1578
    },
    {
      "epoch": 0.01579,
      "grad_norm": 0.46078437812161954,
      "learning_rate": 0.003,
      "loss": 4.2271,
      "step": 1579
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.3811995998581025,
      "learning_rate": 0.003,
      "loss": 4.2344,
      "step": 1580
    },
    {
      "epoch": 0.01581,
      "grad_norm": 0.40258363386782764,
      "learning_rate": 0.003,
      "loss": 4.2406,
      "step": 1581
    },
    {
      "epoch": 0.01582,
      "grad_norm": 0.4070479328001984,
      "learning_rate": 0.003,
      "loss": 4.2222,
      "step": 1582
    },
    {
      "epoch": 0.01583,
      "grad_norm": 0.39481584768858025,
      "learning_rate": 0.003,
      "loss": 4.2093,
      "step": 1583
    },
    {
      "epoch": 0.01584,
      "grad_norm": 0.4212949759278112,
      "learning_rate": 0.003,
      "loss": 4.24,
      "step": 1584
    },
    {
      "epoch": 0.01585,
      "grad_norm": 0.6375222350508811,
      "learning_rate": 0.003,
      "loss": 4.2351,
      "step": 1585
    },
    {
      "epoch": 0.01586,
      "grad_norm": 1.1087267261978884,
      "learning_rate": 0.003,
      "loss": 4.2389,
      "step": 1586
    },
    {
      "epoch": 0.01587,
      "grad_norm": 1.095427209738268,
      "learning_rate": 0.003,
      "loss": 4.2576,
      "step": 1587
    },
    {
      "epoch": 0.01588,
      "grad_norm": 0.5974585636629688,
      "learning_rate": 0.003,
      "loss": 4.2673,
      "step": 1588
    },
    {
      "epoch": 0.01589,
      "grad_norm": 0.6434065905263348,
      "learning_rate": 0.003,
      "loss": 4.2657,
      "step": 1589
    },
    {
      "epoch": 0.0159,
      "grad_norm": 0.5922765275307262,
      "learning_rate": 0.003,
      "loss": 4.2507,
      "step": 1590
    },
    {
      "epoch": 0.01591,
      "grad_norm": 0.6559622239667369,
      "learning_rate": 0.003,
      "loss": 4.2576,
      "step": 1591
    },
    {
      "epoch": 0.01592,
      "grad_norm": 0.5876956364208827,
      "learning_rate": 0.003,
      "loss": 4.2446,
      "step": 1592
    },
    {
      "epoch": 0.01593,
      "grad_norm": 0.4847713630300587,
      "learning_rate": 0.003,
      "loss": 4.2428,
      "step": 1593
    },
    {
      "epoch": 0.01594,
      "grad_norm": 0.5605434129471829,
      "learning_rate": 0.003,
      "loss": 4.2313,
      "step": 1594
    },
    {
      "epoch": 0.01595,
      "grad_norm": 0.5718187801259297,
      "learning_rate": 0.003,
      "loss": 4.2581,
      "step": 1595
    },
    {
      "epoch": 0.01596,
      "grad_norm": 0.620870673073496,
      "learning_rate": 0.003,
      "loss": 4.2509,
      "step": 1596
    },
    {
      "epoch": 0.01597,
      "grad_norm": 0.6969552121884817,
      "learning_rate": 0.003,
      "loss": 4.2154,
      "step": 1597
    },
    {
      "epoch": 0.01598,
      "grad_norm": 0.8094238052393485,
      "learning_rate": 0.003,
      "loss": 4.2172,
      "step": 1598
    },
    {
      "epoch": 0.01599,
      "grad_norm": 0.7713054312041273,
      "learning_rate": 0.003,
      "loss": 4.2333,
      "step": 1599
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.7787996781195005,
      "learning_rate": 0.003,
      "loss": 4.2062,
      "step": 1600
    },
    {
      "epoch": 0.01601,
      "grad_norm": 0.620719741277366,
      "learning_rate": 0.003,
      "loss": 4.2478,
      "step": 1601
    },
    {
      "epoch": 0.01602,
      "grad_norm": 0.5965590372558249,
      "learning_rate": 0.003,
      "loss": 4.223,
      "step": 1602
    },
    {
      "epoch": 0.01603,
      "grad_norm": 0.6057948188618382,
      "learning_rate": 0.003,
      "loss": 4.2386,
      "step": 1603
    },
    {
      "epoch": 0.01604,
      "grad_norm": 0.5859806802733505,
      "learning_rate": 0.003,
      "loss": 4.2313,
      "step": 1604
    },
    {
      "epoch": 0.01605,
      "grad_norm": 0.5985785202970987,
      "learning_rate": 0.003,
      "loss": 4.2093,
      "step": 1605
    },
    {
      "epoch": 0.01606,
      "grad_norm": 0.6329923375985832,
      "learning_rate": 0.003,
      "loss": 4.2131,
      "step": 1606
    },
    {
      "epoch": 0.01607,
      "grad_norm": 0.6777814987430973,
      "learning_rate": 0.003,
      "loss": 4.2418,
      "step": 1607
    },
    {
      "epoch": 0.01608,
      "grad_norm": 0.6454825679602344,
      "learning_rate": 0.003,
      "loss": 4.2152,
      "step": 1608
    },
    {
      "epoch": 0.01609,
      "grad_norm": 0.6234331303306465,
      "learning_rate": 0.003,
      "loss": 4.222,
      "step": 1609
    },
    {
      "epoch": 0.0161,
      "grad_norm": 0.717661852780552,
      "learning_rate": 0.003,
      "loss": 4.1917,
      "step": 1610
    },
    {
      "epoch": 0.01611,
      "grad_norm": 0.814514408732785,
      "learning_rate": 0.003,
      "loss": 4.2133,
      "step": 1611
    },
    {
      "epoch": 0.01612,
      "grad_norm": 0.968378243767811,
      "learning_rate": 0.003,
      "loss": 4.2184,
      "step": 1612
    },
    {
      "epoch": 0.01613,
      "grad_norm": 1.1889382213143287,
      "learning_rate": 0.003,
      "loss": 4.2747,
      "step": 1613
    },
    {
      "epoch": 0.01614,
      "grad_norm": 0.902755449051569,
      "learning_rate": 0.003,
      "loss": 4.2894,
      "step": 1614
    },
    {
      "epoch": 0.01615,
      "grad_norm": 0.8743919537144361,
      "learning_rate": 0.003,
      "loss": 4.2622,
      "step": 1615
    },
    {
      "epoch": 0.01616,
      "grad_norm": 0.8601657492637806,
      "learning_rate": 0.003,
      "loss": 4.2396,
      "step": 1616
    },
    {
      "epoch": 0.01617,
      "grad_norm": 0.8301577875616364,
      "learning_rate": 0.003,
      "loss": 4.2333,
      "step": 1617
    },
    {
      "epoch": 0.01618,
      "grad_norm": 0.829332785699801,
      "learning_rate": 0.003,
      "loss": 4.2387,
      "step": 1618
    },
    {
      "epoch": 0.01619,
      "grad_norm": 0.7047548182139096,
      "learning_rate": 0.003,
      "loss": 4.2209,
      "step": 1619
    },
    {
      "epoch": 0.0162,
      "grad_norm": 0.7270289420377364,
      "learning_rate": 0.003,
      "loss": 4.2317,
      "step": 1620
    },
    {
      "epoch": 0.01621,
      "grad_norm": 0.6929887184456018,
      "learning_rate": 0.003,
      "loss": 4.2257,
      "step": 1621
    },
    {
      "epoch": 0.01622,
      "grad_norm": 0.7471865137769718,
      "learning_rate": 0.003,
      "loss": 4.2589,
      "step": 1622
    },
    {
      "epoch": 0.01623,
      "grad_norm": 0.8282860007733814,
      "learning_rate": 0.003,
      "loss": 4.2399,
      "step": 1623
    },
    {
      "epoch": 0.01624,
      "grad_norm": 0.8871911840665138,
      "learning_rate": 0.003,
      "loss": 4.2581,
      "step": 1624
    },
    {
      "epoch": 0.01625,
      "grad_norm": 1.0259944995678087,
      "learning_rate": 0.003,
      "loss": 4.2469,
      "step": 1625
    },
    {
      "epoch": 0.01626,
      "grad_norm": 0.937815191301713,
      "learning_rate": 0.003,
      "loss": 4.2602,
      "step": 1626
    },
    {
      "epoch": 0.01627,
      "grad_norm": 1.064022068457424,
      "learning_rate": 0.003,
      "loss": 4.2368,
      "step": 1627
    },
    {
      "epoch": 0.01628,
      "grad_norm": 1.0710330182336567,
      "learning_rate": 0.003,
      "loss": 4.2567,
      "step": 1628
    },
    {
      "epoch": 0.01629,
      "grad_norm": 0.7877325862421115,
      "learning_rate": 0.003,
      "loss": 4.2414,
      "step": 1629
    },
    {
      "epoch": 0.0163,
      "grad_norm": 0.6743454531988056,
      "learning_rate": 0.003,
      "loss": 4.2288,
      "step": 1630
    },
    {
      "epoch": 0.01631,
      "grad_norm": 0.701073161255015,
      "learning_rate": 0.003,
      "loss": 4.2499,
      "step": 1631
    },
    {
      "epoch": 0.01632,
      "grad_norm": 0.6004677978717051,
      "learning_rate": 0.003,
      "loss": 4.252,
      "step": 1632
    },
    {
      "epoch": 0.01633,
      "grad_norm": 0.5772521307266697,
      "learning_rate": 0.003,
      "loss": 4.2255,
      "step": 1633
    },
    {
      "epoch": 0.01634,
      "grad_norm": 0.5454153935480286,
      "learning_rate": 0.003,
      "loss": 4.2406,
      "step": 1634
    },
    {
      "epoch": 0.01635,
      "grad_norm": 0.5154455969479952,
      "learning_rate": 0.003,
      "loss": 4.218,
      "step": 1635
    },
    {
      "epoch": 0.01636,
      "grad_norm": 0.5267549110628605,
      "learning_rate": 0.003,
      "loss": 4.2348,
      "step": 1636
    },
    {
      "epoch": 0.01637,
      "grad_norm": 0.5971697216641335,
      "learning_rate": 0.003,
      "loss": 4.2205,
      "step": 1637
    },
    {
      "epoch": 0.01638,
      "grad_norm": 0.7211767188224439,
      "learning_rate": 0.003,
      "loss": 4.229,
      "step": 1638
    },
    {
      "epoch": 0.01639,
      "grad_norm": 0.7816125211534669,
      "learning_rate": 0.003,
      "loss": 4.2548,
      "step": 1639
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.7389027343982134,
      "learning_rate": 0.003,
      "loss": 4.2362,
      "step": 1640
    },
    {
      "epoch": 0.01641,
      "grad_norm": 0.5975265072148448,
      "learning_rate": 0.003,
      "loss": 4.2026,
      "step": 1641
    },
    {
      "epoch": 0.01642,
      "grad_norm": 0.7083476744478381,
      "learning_rate": 0.003,
      "loss": 4.2507,
      "step": 1642
    },
    {
      "epoch": 0.01643,
      "grad_norm": 0.7791000270261257,
      "learning_rate": 0.003,
      "loss": 4.2378,
      "step": 1643
    },
    {
      "epoch": 0.01644,
      "grad_norm": 0.9006394984546569,
      "learning_rate": 0.003,
      "loss": 4.2625,
      "step": 1644
    },
    {
      "epoch": 0.01645,
      "grad_norm": 1.2334686487745912,
      "learning_rate": 0.003,
      "loss": 4.2236,
      "step": 1645
    },
    {
      "epoch": 0.01646,
      "grad_norm": 0.9932721101220016,
      "learning_rate": 0.003,
      "loss": 4.263,
      "step": 1646
    },
    {
      "epoch": 0.01647,
      "grad_norm": 0.989511986212608,
      "learning_rate": 0.003,
      "loss": 4.2576,
      "step": 1647
    },
    {
      "epoch": 0.01648,
      "grad_norm": 0.8142750385583939,
      "learning_rate": 0.003,
      "loss": 4.23,
      "step": 1648
    },
    {
      "epoch": 0.01649,
      "grad_norm": 0.6967651791258922,
      "learning_rate": 0.003,
      "loss": 4.2583,
      "step": 1649
    },
    {
      "epoch": 0.0165,
      "grad_norm": 0.7348292229935104,
      "learning_rate": 0.003,
      "loss": 4.2361,
      "step": 1650
    },
    {
      "epoch": 0.01651,
      "grad_norm": 0.6565673319405235,
      "learning_rate": 0.003,
      "loss": 4.2402,
      "step": 1651
    },
    {
      "epoch": 0.01652,
      "grad_norm": 0.5674400404701213,
      "learning_rate": 0.003,
      "loss": 4.1936,
      "step": 1652
    },
    {
      "epoch": 0.01653,
      "grad_norm": 0.5571261023539824,
      "learning_rate": 0.003,
      "loss": 4.2378,
      "step": 1653
    },
    {
      "epoch": 0.01654,
      "grad_norm": 0.5083863828402352,
      "learning_rate": 0.003,
      "loss": 4.2229,
      "step": 1654
    },
    {
      "epoch": 0.01655,
      "grad_norm": 0.49820131551804164,
      "learning_rate": 0.003,
      "loss": 4.23,
      "step": 1655
    },
    {
      "epoch": 0.01656,
      "grad_norm": 0.5201001189396881,
      "learning_rate": 0.003,
      "loss": 4.2012,
      "step": 1656
    },
    {
      "epoch": 0.01657,
      "grad_norm": 0.6652455003065759,
      "learning_rate": 0.003,
      "loss": 4.2237,
      "step": 1657
    },
    {
      "epoch": 0.01658,
      "grad_norm": 0.8870121051704852,
      "learning_rate": 0.003,
      "loss": 4.219,
      "step": 1658
    },
    {
      "epoch": 0.01659,
      "grad_norm": 1.1634698909054708,
      "learning_rate": 0.003,
      "loss": 4.2361,
      "step": 1659
    },
    {
      "epoch": 0.0166,
      "grad_norm": 0.6579478278546654,
      "learning_rate": 0.003,
      "loss": 4.2096,
      "step": 1660
    },
    {
      "epoch": 0.01661,
      "grad_norm": 0.5917680182889076,
      "learning_rate": 0.003,
      "loss": 4.2092,
      "step": 1661
    },
    {
      "epoch": 0.01662,
      "grad_norm": 0.7821000738049482,
      "learning_rate": 0.003,
      "loss": 4.2171,
      "step": 1662
    },
    {
      "epoch": 0.01663,
      "grad_norm": 0.812312164693218,
      "learning_rate": 0.003,
      "loss": 4.2422,
      "step": 1663
    },
    {
      "epoch": 0.01664,
      "grad_norm": 0.7053733462479915,
      "learning_rate": 0.003,
      "loss": 4.2567,
      "step": 1664
    },
    {
      "epoch": 0.01665,
      "grad_norm": 0.713897124959619,
      "learning_rate": 0.003,
      "loss": 4.2198,
      "step": 1665
    },
    {
      "epoch": 0.01666,
      "grad_norm": 0.6600299709203347,
      "learning_rate": 0.003,
      "loss": 4.2205,
      "step": 1666
    },
    {
      "epoch": 0.01667,
      "grad_norm": 0.5832085680202569,
      "learning_rate": 0.003,
      "loss": 4.2321,
      "step": 1667
    },
    {
      "epoch": 0.01668,
      "grad_norm": 0.6589799435966834,
      "learning_rate": 0.003,
      "loss": 4.2397,
      "step": 1668
    },
    {
      "epoch": 0.01669,
      "grad_norm": 0.7719970182316414,
      "learning_rate": 0.003,
      "loss": 4.2273,
      "step": 1669
    },
    {
      "epoch": 0.0167,
      "grad_norm": 0.7846205843605817,
      "learning_rate": 0.003,
      "loss": 4.2184,
      "step": 1670
    },
    {
      "epoch": 0.01671,
      "grad_norm": 0.7635694428992538,
      "learning_rate": 0.003,
      "loss": 4.2087,
      "step": 1671
    },
    {
      "epoch": 0.01672,
      "grad_norm": 0.7758449881926132,
      "learning_rate": 0.003,
      "loss": 4.2199,
      "step": 1672
    },
    {
      "epoch": 0.01673,
      "grad_norm": 0.6618290355489082,
      "learning_rate": 0.003,
      "loss": 4.2134,
      "step": 1673
    },
    {
      "epoch": 0.01674,
      "grad_norm": 0.6727466502726199,
      "learning_rate": 0.003,
      "loss": 4.232,
      "step": 1674
    },
    {
      "epoch": 0.01675,
      "grad_norm": 0.6690360284678838,
      "learning_rate": 0.003,
      "loss": 4.2158,
      "step": 1675
    },
    {
      "epoch": 0.01676,
      "grad_norm": 0.6672382539021496,
      "learning_rate": 0.003,
      "loss": 4.2242,
      "step": 1676
    },
    {
      "epoch": 0.01677,
      "grad_norm": 0.6311361325143124,
      "learning_rate": 0.003,
      "loss": 4.2132,
      "step": 1677
    },
    {
      "epoch": 0.01678,
      "grad_norm": 0.727198962494881,
      "learning_rate": 0.003,
      "loss": 4.2299,
      "step": 1678
    },
    {
      "epoch": 0.01679,
      "grad_norm": 0.8433802333808034,
      "learning_rate": 0.003,
      "loss": 4.2177,
      "step": 1679
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.8402151525100402,
      "learning_rate": 0.003,
      "loss": 4.2352,
      "step": 1680
    },
    {
      "epoch": 0.01681,
      "grad_norm": 0.9608253642789906,
      "learning_rate": 0.003,
      "loss": 4.2433,
      "step": 1681
    },
    {
      "epoch": 0.01682,
      "grad_norm": 1.160024891342376,
      "learning_rate": 0.003,
      "loss": 4.246,
      "step": 1682
    },
    {
      "epoch": 0.01683,
      "grad_norm": 0.8201452744681862,
      "learning_rate": 0.003,
      "loss": 4.2539,
      "step": 1683
    },
    {
      "epoch": 0.01684,
      "grad_norm": 0.7434576192749681,
      "learning_rate": 0.003,
      "loss": 4.2242,
      "step": 1684
    },
    {
      "epoch": 0.01685,
      "grad_norm": 0.8733167238677911,
      "learning_rate": 0.003,
      "loss": 4.2448,
      "step": 1685
    },
    {
      "epoch": 0.01686,
      "grad_norm": 0.7251849705005492,
      "learning_rate": 0.003,
      "loss": 4.2342,
      "step": 1686
    },
    {
      "epoch": 0.01687,
      "grad_norm": 0.623355393019925,
      "learning_rate": 0.003,
      "loss": 4.2258,
      "step": 1687
    },
    {
      "epoch": 0.01688,
      "grad_norm": 0.6485004834811148,
      "learning_rate": 0.003,
      "loss": 4.2113,
      "step": 1688
    },
    {
      "epoch": 0.01689,
      "grad_norm": 0.6622097332232696,
      "learning_rate": 0.003,
      "loss": 4.2246,
      "step": 1689
    },
    {
      "epoch": 0.0169,
      "grad_norm": 0.5609920955895745,
      "learning_rate": 0.003,
      "loss": 4.2095,
      "step": 1690
    },
    {
      "epoch": 0.01691,
      "grad_norm": 0.5554950872814335,
      "learning_rate": 0.003,
      "loss": 4.2208,
      "step": 1691
    },
    {
      "epoch": 0.01692,
      "grad_norm": 0.578754263387868,
      "learning_rate": 0.003,
      "loss": 4.2223,
      "step": 1692
    },
    {
      "epoch": 0.01693,
      "grad_norm": 0.54763155586202,
      "learning_rate": 0.003,
      "loss": 4.2453,
      "step": 1693
    },
    {
      "epoch": 0.01694,
      "grad_norm": 0.5925337152955293,
      "learning_rate": 0.003,
      "loss": 4.1963,
      "step": 1694
    },
    {
      "epoch": 0.01695,
      "grad_norm": 0.5883008115307695,
      "learning_rate": 0.003,
      "loss": 4.2285,
      "step": 1695
    },
    {
      "epoch": 0.01696,
      "grad_norm": 0.5777962347479031,
      "learning_rate": 0.003,
      "loss": 4.2174,
      "step": 1696
    },
    {
      "epoch": 0.01697,
      "grad_norm": 0.6137264663560853,
      "learning_rate": 0.003,
      "loss": 4.1953,
      "step": 1697
    },
    {
      "epoch": 0.01698,
      "grad_norm": 0.5567779485618047,
      "learning_rate": 0.003,
      "loss": 4.1887,
      "step": 1698
    },
    {
      "epoch": 0.01699,
      "grad_norm": 0.563692648440794,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 1699
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.5378862525995165,
      "learning_rate": 0.003,
      "loss": 4.2165,
      "step": 1700
    },
    {
      "epoch": 0.01701,
      "grad_norm": 0.5131924880718308,
      "learning_rate": 0.003,
      "loss": 4.1973,
      "step": 1701
    },
    {
      "epoch": 0.01702,
      "grad_norm": 0.5473378733884345,
      "learning_rate": 0.003,
      "loss": 4.2207,
      "step": 1702
    },
    {
      "epoch": 0.01703,
      "grad_norm": 0.659714976195776,
      "learning_rate": 0.003,
      "loss": 4.2099,
      "step": 1703
    },
    {
      "epoch": 0.01704,
      "grad_norm": 0.7407833108135735,
      "learning_rate": 0.003,
      "loss": 4.2001,
      "step": 1704
    },
    {
      "epoch": 0.01705,
      "grad_norm": 0.7567928276503215,
      "learning_rate": 0.003,
      "loss": 4.2073,
      "step": 1705
    },
    {
      "epoch": 0.01706,
      "grad_norm": 0.7694420146818104,
      "learning_rate": 0.003,
      "loss": 4.2055,
      "step": 1706
    },
    {
      "epoch": 0.01707,
      "grad_norm": 1.0045884849963176,
      "learning_rate": 0.003,
      "loss": 4.2374,
      "step": 1707
    },
    {
      "epoch": 0.01708,
      "grad_norm": 1.2575213437630939,
      "learning_rate": 0.003,
      "loss": 4.2465,
      "step": 1708
    },
    {
      "epoch": 0.01709,
      "grad_norm": 0.848642979076845,
      "learning_rate": 0.003,
      "loss": 4.2418,
      "step": 1709
    },
    {
      "epoch": 0.0171,
      "grad_norm": 0.7781603648787689,
      "learning_rate": 0.003,
      "loss": 4.1936,
      "step": 1710
    },
    {
      "epoch": 0.01711,
      "grad_norm": 0.7996923845242195,
      "learning_rate": 0.003,
      "loss": 4.2245,
      "step": 1711
    },
    {
      "epoch": 0.01712,
      "grad_norm": 0.7302785582736923,
      "learning_rate": 0.003,
      "loss": 4.2298,
      "step": 1712
    },
    {
      "epoch": 0.01713,
      "grad_norm": 0.7495189068505729,
      "learning_rate": 0.003,
      "loss": 4.204,
      "step": 1713
    },
    {
      "epoch": 0.01714,
      "grad_norm": 0.68320470368433,
      "learning_rate": 0.003,
      "loss": 4.2407,
      "step": 1714
    },
    {
      "epoch": 0.01715,
      "grad_norm": 0.744728870764733,
      "learning_rate": 0.003,
      "loss": 4.2481,
      "step": 1715
    },
    {
      "epoch": 0.01716,
      "grad_norm": 0.7728817520220844,
      "learning_rate": 0.003,
      "loss": 4.2317,
      "step": 1716
    },
    {
      "epoch": 0.01717,
      "grad_norm": 0.8671487727140617,
      "learning_rate": 0.003,
      "loss": 4.2664,
      "step": 1717
    },
    {
      "epoch": 0.01718,
      "grad_norm": 0.8286537250448458,
      "learning_rate": 0.003,
      "loss": 4.237,
      "step": 1718
    },
    {
      "epoch": 0.01719,
      "grad_norm": 0.7719901017400573,
      "learning_rate": 0.003,
      "loss": 4.2535,
      "step": 1719
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.7363513590865703,
      "learning_rate": 0.003,
      "loss": 4.2105,
      "step": 1720
    },
    {
      "epoch": 0.01721,
      "grad_norm": 0.6255496683743654,
      "learning_rate": 0.003,
      "loss": 4.2258,
      "step": 1721
    },
    {
      "epoch": 0.01722,
      "grad_norm": 0.5928702135655549,
      "learning_rate": 0.003,
      "loss": 4.2129,
      "step": 1722
    },
    {
      "epoch": 0.01723,
      "grad_norm": 0.6570038276937221,
      "learning_rate": 0.003,
      "loss": 4.2248,
      "step": 1723
    },
    {
      "epoch": 0.01724,
      "grad_norm": 0.6380856228410711,
      "learning_rate": 0.003,
      "loss": 4.2241,
      "step": 1724
    },
    {
      "epoch": 0.01725,
      "grad_norm": 0.7477214539729845,
      "learning_rate": 0.003,
      "loss": 4.2088,
      "step": 1725
    },
    {
      "epoch": 0.01726,
      "grad_norm": 0.7064643502935442,
      "learning_rate": 0.003,
      "loss": 4.2027,
      "step": 1726
    },
    {
      "epoch": 0.01727,
      "grad_norm": 0.7018895227983273,
      "learning_rate": 0.003,
      "loss": 4.2082,
      "step": 1727
    },
    {
      "epoch": 0.01728,
      "grad_norm": 0.7268534843228023,
      "learning_rate": 0.003,
      "loss": 4.2116,
      "step": 1728
    },
    {
      "epoch": 0.01729,
      "grad_norm": 0.8265185231394759,
      "learning_rate": 0.003,
      "loss": 4.2226,
      "step": 1729
    },
    {
      "epoch": 0.0173,
      "grad_norm": 0.9216458529069881,
      "learning_rate": 0.003,
      "loss": 4.2023,
      "step": 1730
    },
    {
      "epoch": 0.01731,
      "grad_norm": 1.0984877424905655,
      "learning_rate": 0.003,
      "loss": 4.2294,
      "step": 1731
    },
    {
      "epoch": 0.01732,
      "grad_norm": 0.9457932448227158,
      "learning_rate": 0.003,
      "loss": 4.2417,
      "step": 1732
    },
    {
      "epoch": 0.01733,
      "grad_norm": 0.8744510824253744,
      "learning_rate": 0.003,
      "loss": 4.2124,
      "step": 1733
    },
    {
      "epoch": 0.01734,
      "grad_norm": 0.8464357668562363,
      "learning_rate": 0.003,
      "loss": 4.2277,
      "step": 1734
    },
    {
      "epoch": 0.01735,
      "grad_norm": 0.7933851274075868,
      "learning_rate": 0.003,
      "loss": 4.1993,
      "step": 1735
    },
    {
      "epoch": 0.01736,
      "grad_norm": 0.7800431566899245,
      "learning_rate": 0.003,
      "loss": 4.2002,
      "step": 1736
    },
    {
      "epoch": 0.01737,
      "grad_norm": 0.7049958430065779,
      "learning_rate": 0.003,
      "loss": 4.2254,
      "step": 1737
    },
    {
      "epoch": 0.01738,
      "grad_norm": 0.7322441940210889,
      "learning_rate": 0.003,
      "loss": 4.2135,
      "step": 1738
    },
    {
      "epoch": 0.01739,
      "grad_norm": 0.7327690195119888,
      "learning_rate": 0.003,
      "loss": 4.2297,
      "step": 1739
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.7530152956649067,
      "learning_rate": 0.003,
      "loss": 4.231,
      "step": 1740
    },
    {
      "epoch": 0.01741,
      "grad_norm": 0.7681419574064243,
      "learning_rate": 0.003,
      "loss": 4.2369,
      "step": 1741
    },
    {
      "epoch": 0.01742,
      "grad_norm": 0.8179621872439043,
      "learning_rate": 0.003,
      "loss": 4.2278,
      "step": 1742
    },
    {
      "epoch": 0.01743,
      "grad_norm": 0.9578047731817773,
      "learning_rate": 0.003,
      "loss": 4.2432,
      "step": 1743
    },
    {
      "epoch": 0.01744,
      "grad_norm": 1.0853544694502002,
      "learning_rate": 0.003,
      "loss": 4.2354,
      "step": 1744
    },
    {
      "epoch": 0.01745,
      "grad_norm": 0.901300254011202,
      "learning_rate": 0.003,
      "loss": 4.1985,
      "step": 1745
    },
    {
      "epoch": 0.01746,
      "grad_norm": 0.7977925606337166,
      "learning_rate": 0.003,
      "loss": 4.2177,
      "step": 1746
    },
    {
      "epoch": 0.01747,
      "grad_norm": 0.7805003864858971,
      "learning_rate": 0.003,
      "loss": 4.2153,
      "step": 1747
    },
    {
      "epoch": 0.01748,
      "grad_norm": 0.5958068748685724,
      "learning_rate": 0.003,
      "loss": 4.2004,
      "step": 1748
    },
    {
      "epoch": 0.01749,
      "grad_norm": 0.5609955740586448,
      "learning_rate": 0.003,
      "loss": 4.1969,
      "step": 1749
    },
    {
      "epoch": 0.0175,
      "grad_norm": 0.5677742109277946,
      "learning_rate": 0.003,
      "loss": 4.2121,
      "step": 1750
    },
    {
      "epoch": 0.01751,
      "grad_norm": 0.5067010918467684,
      "learning_rate": 0.003,
      "loss": 4.2009,
      "step": 1751
    },
    {
      "epoch": 0.01752,
      "grad_norm": 0.4469462107071404,
      "learning_rate": 0.003,
      "loss": 4.239,
      "step": 1752
    },
    {
      "epoch": 0.01753,
      "grad_norm": 0.4299529922000094,
      "learning_rate": 0.003,
      "loss": 4.1859,
      "step": 1753
    },
    {
      "epoch": 0.01754,
      "grad_norm": 0.39327472210537173,
      "learning_rate": 0.003,
      "loss": 4.2141,
      "step": 1754
    },
    {
      "epoch": 0.01755,
      "grad_norm": 0.4833428525390586,
      "learning_rate": 0.003,
      "loss": 4.1582,
      "step": 1755
    },
    {
      "epoch": 0.01756,
      "grad_norm": 0.5692339112673285,
      "learning_rate": 0.003,
      "loss": 4.2351,
      "step": 1756
    },
    {
      "epoch": 0.01757,
      "grad_norm": 0.765825485079122,
      "learning_rate": 0.003,
      "loss": 4.2204,
      "step": 1757
    },
    {
      "epoch": 0.01758,
      "grad_norm": 0.9042836101782444,
      "learning_rate": 0.003,
      "loss": 4.2384,
      "step": 1758
    },
    {
      "epoch": 0.01759,
      "grad_norm": 0.8742540565152447,
      "learning_rate": 0.003,
      "loss": 4.2064,
      "step": 1759
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.7243758331092184,
      "learning_rate": 0.003,
      "loss": 4.2193,
      "step": 1760
    },
    {
      "epoch": 0.01761,
      "grad_norm": 0.6848453720193391,
      "learning_rate": 0.003,
      "loss": 4.2074,
      "step": 1761
    },
    {
      "epoch": 0.01762,
      "grad_norm": 0.8704455967545811,
      "learning_rate": 0.003,
      "loss": 4.2328,
      "step": 1762
    },
    {
      "epoch": 0.01763,
      "grad_norm": 0.8471557315960334,
      "learning_rate": 0.003,
      "loss": 4.2246,
      "step": 1763
    },
    {
      "epoch": 0.01764,
      "grad_norm": 0.7827279567320375,
      "learning_rate": 0.003,
      "loss": 4.1921,
      "step": 1764
    },
    {
      "epoch": 0.01765,
      "grad_norm": 0.8790545608575575,
      "learning_rate": 0.003,
      "loss": 4.2237,
      "step": 1765
    },
    {
      "epoch": 0.01766,
      "grad_norm": 0.763900084595892,
      "learning_rate": 0.003,
      "loss": 4.2322,
      "step": 1766
    },
    {
      "epoch": 0.01767,
      "grad_norm": 0.7449808930716393,
      "learning_rate": 0.003,
      "loss": 4.1962,
      "step": 1767
    },
    {
      "epoch": 0.01768,
      "grad_norm": 0.6931774004435453,
      "learning_rate": 0.003,
      "loss": 4.2017,
      "step": 1768
    },
    {
      "epoch": 0.01769,
      "grad_norm": 0.7426550774463054,
      "learning_rate": 0.003,
      "loss": 4.2066,
      "step": 1769
    },
    {
      "epoch": 0.0177,
      "grad_norm": 0.7834040480889046,
      "learning_rate": 0.003,
      "loss": 4.1901,
      "step": 1770
    },
    {
      "epoch": 0.01771,
      "grad_norm": 0.6750175361724853,
      "learning_rate": 0.003,
      "loss": 4.2195,
      "step": 1771
    },
    {
      "epoch": 0.01772,
      "grad_norm": 0.6376494933849937,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 1772
    },
    {
      "epoch": 0.01773,
      "grad_norm": 0.6192068250431033,
      "learning_rate": 0.003,
      "loss": 4.1807,
      "step": 1773
    },
    {
      "epoch": 0.01774,
      "grad_norm": 0.6106095483716042,
      "learning_rate": 0.003,
      "loss": 4.2034,
      "step": 1774
    },
    {
      "epoch": 0.01775,
      "grad_norm": 0.6196510426340663,
      "learning_rate": 0.003,
      "loss": 4.1895,
      "step": 1775
    },
    {
      "epoch": 0.01776,
      "grad_norm": 0.7097137120766858,
      "learning_rate": 0.003,
      "loss": 4.1828,
      "step": 1776
    },
    {
      "epoch": 0.01777,
      "grad_norm": 0.7063319061887237,
      "learning_rate": 0.003,
      "loss": 4.2044,
      "step": 1777
    },
    {
      "epoch": 0.01778,
      "grad_norm": 0.629621456935323,
      "learning_rate": 0.003,
      "loss": 4.194,
      "step": 1778
    },
    {
      "epoch": 0.01779,
      "grad_norm": 0.7088930396533265,
      "learning_rate": 0.003,
      "loss": 4.215,
      "step": 1779
    },
    {
      "epoch": 0.0178,
      "grad_norm": 0.7253919374521124,
      "learning_rate": 0.003,
      "loss": 4.191,
      "step": 1780
    },
    {
      "epoch": 0.01781,
      "grad_norm": 0.7852702442407208,
      "learning_rate": 0.003,
      "loss": 4.2044,
      "step": 1781
    },
    {
      "epoch": 0.01782,
      "grad_norm": 0.8394755648372281,
      "learning_rate": 0.003,
      "loss": 4.2328,
      "step": 1782
    },
    {
      "epoch": 0.01783,
      "grad_norm": 0.6910931858922343,
      "learning_rate": 0.003,
      "loss": 4.2068,
      "step": 1783
    },
    {
      "epoch": 0.01784,
      "grad_norm": 0.7674520462715927,
      "learning_rate": 0.003,
      "loss": 4.2107,
      "step": 1784
    },
    {
      "epoch": 0.01785,
      "grad_norm": 0.8199387977488617,
      "learning_rate": 0.003,
      "loss": 4.1741,
      "step": 1785
    },
    {
      "epoch": 0.01786,
      "grad_norm": 0.9173162484249777,
      "learning_rate": 0.003,
      "loss": 4.2074,
      "step": 1786
    },
    {
      "epoch": 0.01787,
      "grad_norm": 1.334253988039247,
      "learning_rate": 0.003,
      "loss": 4.2336,
      "step": 1787
    },
    {
      "epoch": 0.01788,
      "grad_norm": 1.0880162732024794,
      "learning_rate": 0.003,
      "loss": 4.2464,
      "step": 1788
    },
    {
      "epoch": 0.01789,
      "grad_norm": 0.8768448988019708,
      "learning_rate": 0.003,
      "loss": 4.2114,
      "step": 1789
    },
    {
      "epoch": 0.0179,
      "grad_norm": 0.9478883510444388,
      "learning_rate": 0.003,
      "loss": 4.2362,
      "step": 1790
    },
    {
      "epoch": 0.01791,
      "grad_norm": 0.9789788338963192,
      "learning_rate": 0.003,
      "loss": 4.2475,
      "step": 1791
    },
    {
      "epoch": 0.01792,
      "grad_norm": 0.9379061880413183,
      "learning_rate": 0.003,
      "loss": 4.2484,
      "step": 1792
    },
    {
      "epoch": 0.01793,
      "grad_norm": 0.9327271640213346,
      "learning_rate": 0.003,
      "loss": 4.215,
      "step": 1793
    },
    {
      "epoch": 0.01794,
      "grad_norm": 0.7690204695866035,
      "learning_rate": 0.003,
      "loss": 4.2222,
      "step": 1794
    },
    {
      "epoch": 0.01795,
      "grad_norm": 0.6971902449310058,
      "learning_rate": 0.003,
      "loss": 4.2172,
      "step": 1795
    },
    {
      "epoch": 0.01796,
      "grad_norm": 0.762901849453012,
      "learning_rate": 0.003,
      "loss": 4.2347,
      "step": 1796
    },
    {
      "epoch": 0.01797,
      "grad_norm": 0.8440627560282756,
      "learning_rate": 0.003,
      "loss": 4.2037,
      "step": 1797
    },
    {
      "epoch": 0.01798,
      "grad_norm": 0.8368125917294373,
      "learning_rate": 0.003,
      "loss": 4.2148,
      "step": 1798
    },
    {
      "epoch": 0.01799,
      "grad_norm": 0.7348763481882639,
      "learning_rate": 0.003,
      "loss": 4.2247,
      "step": 1799
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.7680359832381647,
      "learning_rate": 0.003,
      "loss": 4.2184,
      "step": 1800
    },
    {
      "epoch": 0.01801,
      "grad_norm": 0.7404783794030995,
      "learning_rate": 0.003,
      "loss": 4.2101,
      "step": 1801
    },
    {
      "epoch": 0.01802,
      "grad_norm": 0.6580953290368566,
      "learning_rate": 0.003,
      "loss": 4.2238,
      "step": 1802
    },
    {
      "epoch": 0.01803,
      "grad_norm": 0.6630631878191348,
      "learning_rate": 0.003,
      "loss": 4.2284,
      "step": 1803
    },
    {
      "epoch": 0.01804,
      "grad_norm": 0.6396012711111247,
      "learning_rate": 0.003,
      "loss": 4.1992,
      "step": 1804
    },
    {
      "epoch": 0.01805,
      "grad_norm": 0.5559219636409923,
      "learning_rate": 0.003,
      "loss": 4.2154,
      "step": 1805
    },
    {
      "epoch": 0.01806,
      "grad_norm": 0.6144421410960899,
      "learning_rate": 0.003,
      "loss": 4.1763,
      "step": 1806
    },
    {
      "epoch": 0.01807,
      "grad_norm": 0.6146777760728932,
      "learning_rate": 0.003,
      "loss": 4.2121,
      "step": 1807
    },
    {
      "epoch": 0.01808,
      "grad_norm": 0.6116485891270191,
      "learning_rate": 0.003,
      "loss": 4.2001,
      "step": 1808
    },
    {
      "epoch": 0.01809,
      "grad_norm": 0.8096274979504547,
      "learning_rate": 0.003,
      "loss": 4.2275,
      "step": 1809
    },
    {
      "epoch": 0.0181,
      "grad_norm": 1.1102312559563035,
      "learning_rate": 0.003,
      "loss": 4.2346,
      "step": 1810
    },
    {
      "epoch": 0.01811,
      "grad_norm": 0.8754620418282368,
      "learning_rate": 0.003,
      "loss": 4.2216,
      "step": 1811
    },
    {
      "epoch": 0.01812,
      "grad_norm": 0.6567434234036816,
      "learning_rate": 0.003,
      "loss": 4.1947,
      "step": 1812
    },
    {
      "epoch": 0.01813,
      "grad_norm": 0.7375490051000831,
      "learning_rate": 0.003,
      "loss": 4.2409,
      "step": 1813
    },
    {
      "epoch": 0.01814,
      "grad_norm": 0.7842570681491089,
      "learning_rate": 0.003,
      "loss": 4.2075,
      "step": 1814
    },
    {
      "epoch": 0.01815,
      "grad_norm": 0.6332278380543743,
      "learning_rate": 0.003,
      "loss": 4.1891,
      "step": 1815
    },
    {
      "epoch": 0.01816,
      "grad_norm": 0.6578340036445237,
      "learning_rate": 0.003,
      "loss": 4.2166,
      "step": 1816
    },
    {
      "epoch": 0.01817,
      "grad_norm": 0.6624785828428507,
      "learning_rate": 0.003,
      "loss": 4.2068,
      "step": 1817
    },
    {
      "epoch": 0.01818,
      "grad_norm": 0.5897502755122268,
      "learning_rate": 0.003,
      "loss": 4.2034,
      "step": 1818
    },
    {
      "epoch": 0.01819,
      "grad_norm": 0.49357607734419184,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 1819
    },
    {
      "epoch": 0.0182,
      "grad_norm": 0.5025205629657381,
      "learning_rate": 0.003,
      "loss": 4.2022,
      "step": 1820
    },
    {
      "epoch": 0.01821,
      "grad_norm": 0.5231764315153676,
      "learning_rate": 0.003,
      "loss": 4.188,
      "step": 1821
    },
    {
      "epoch": 0.01822,
      "grad_norm": 0.589855961825142,
      "learning_rate": 0.003,
      "loss": 4.2017,
      "step": 1822
    },
    {
      "epoch": 0.01823,
      "grad_norm": 0.6454531707936206,
      "learning_rate": 0.003,
      "loss": 4.2159,
      "step": 1823
    },
    {
      "epoch": 0.01824,
      "grad_norm": 0.616450651672451,
      "learning_rate": 0.003,
      "loss": 4.1987,
      "step": 1824
    },
    {
      "epoch": 0.01825,
      "grad_norm": 0.5393299556773667,
      "learning_rate": 0.003,
      "loss": 4.199,
      "step": 1825
    },
    {
      "epoch": 0.01826,
      "grad_norm": 0.6162633558651508,
      "learning_rate": 0.003,
      "loss": 4.2135,
      "step": 1826
    },
    {
      "epoch": 0.01827,
      "grad_norm": 0.6366286833338192,
      "learning_rate": 0.003,
      "loss": 4.1847,
      "step": 1827
    },
    {
      "epoch": 0.01828,
      "grad_norm": 0.5519308619846083,
      "learning_rate": 0.003,
      "loss": 4.1935,
      "step": 1828
    },
    {
      "epoch": 0.01829,
      "grad_norm": 0.556700588713125,
      "learning_rate": 0.003,
      "loss": 4.2083,
      "step": 1829
    },
    {
      "epoch": 0.0183,
      "grad_norm": 0.6225243144725614,
      "learning_rate": 0.003,
      "loss": 4.2039,
      "step": 1830
    },
    {
      "epoch": 0.01831,
      "grad_norm": 0.8092938095153425,
      "learning_rate": 0.003,
      "loss": 4.2017,
      "step": 1831
    },
    {
      "epoch": 0.01832,
      "grad_norm": 1.156880999962459,
      "learning_rate": 0.003,
      "loss": 4.214,
      "step": 1832
    },
    {
      "epoch": 0.01833,
      "grad_norm": 0.8498167035958548,
      "learning_rate": 0.003,
      "loss": 4.2422,
      "step": 1833
    },
    {
      "epoch": 0.01834,
      "grad_norm": 0.6651811196480657,
      "learning_rate": 0.003,
      "loss": 4.2008,
      "step": 1834
    },
    {
      "epoch": 0.01835,
      "grad_norm": 0.7438128363368247,
      "learning_rate": 0.003,
      "loss": 4.205,
      "step": 1835
    },
    {
      "epoch": 0.01836,
      "grad_norm": 0.7712205326044778,
      "learning_rate": 0.003,
      "loss": 4.2014,
      "step": 1836
    },
    {
      "epoch": 0.01837,
      "grad_norm": 0.7094945295905764,
      "learning_rate": 0.003,
      "loss": 4.1968,
      "step": 1837
    },
    {
      "epoch": 0.01838,
      "grad_norm": 0.8255996162995448,
      "learning_rate": 0.003,
      "loss": 4.2049,
      "step": 1838
    },
    {
      "epoch": 0.01839,
      "grad_norm": 0.7001056258854736,
      "learning_rate": 0.003,
      "loss": 4.2234,
      "step": 1839
    },
    {
      "epoch": 0.0184,
      "grad_norm": 0.7156089343491636,
      "learning_rate": 0.003,
      "loss": 4.2248,
      "step": 1840
    },
    {
      "epoch": 0.01841,
      "grad_norm": 0.7514284093505675,
      "learning_rate": 0.003,
      "loss": 4.164,
      "step": 1841
    },
    {
      "epoch": 0.01842,
      "grad_norm": 0.7804042052375482,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 1842
    },
    {
      "epoch": 0.01843,
      "grad_norm": 0.7741073572881331,
      "learning_rate": 0.003,
      "loss": 4.1826,
      "step": 1843
    },
    {
      "epoch": 0.01844,
      "grad_norm": 0.8011416312871611,
      "learning_rate": 0.003,
      "loss": 4.1842,
      "step": 1844
    },
    {
      "epoch": 0.01845,
      "grad_norm": 0.8386720056960224,
      "learning_rate": 0.003,
      "loss": 4.187,
      "step": 1845
    },
    {
      "epoch": 0.01846,
      "grad_norm": 0.9781723483143008,
      "learning_rate": 0.003,
      "loss": 4.2242,
      "step": 1846
    },
    {
      "epoch": 0.01847,
      "grad_norm": 0.9173016227544075,
      "learning_rate": 0.003,
      "loss": 4.2189,
      "step": 1847
    },
    {
      "epoch": 0.01848,
      "grad_norm": 0.961352235410328,
      "learning_rate": 0.003,
      "loss": 4.2359,
      "step": 1848
    },
    {
      "epoch": 0.01849,
      "grad_norm": 0.9115360913743059,
      "learning_rate": 0.003,
      "loss": 4.241,
      "step": 1849
    },
    {
      "epoch": 0.0185,
      "grad_norm": 0.949227003474929,
      "learning_rate": 0.003,
      "loss": 4.2298,
      "step": 1850
    },
    {
      "epoch": 0.01851,
      "grad_norm": 0.756851342238193,
      "learning_rate": 0.003,
      "loss": 4.2063,
      "step": 1851
    },
    {
      "epoch": 0.01852,
      "grad_norm": 0.7062657388831308,
      "learning_rate": 0.003,
      "loss": 4.2058,
      "step": 1852
    },
    {
      "epoch": 0.01853,
      "grad_norm": 0.804960952863702,
      "learning_rate": 0.003,
      "loss": 4.206,
      "step": 1853
    },
    {
      "epoch": 0.01854,
      "grad_norm": 0.8048402157100555,
      "learning_rate": 0.003,
      "loss": 4.2379,
      "step": 1854
    },
    {
      "epoch": 0.01855,
      "grad_norm": 0.8514933979131342,
      "learning_rate": 0.003,
      "loss": 4.2236,
      "step": 1855
    },
    {
      "epoch": 0.01856,
      "grad_norm": 0.9293561457561126,
      "learning_rate": 0.003,
      "loss": 4.2144,
      "step": 1856
    },
    {
      "epoch": 0.01857,
      "grad_norm": 0.8452785281639297,
      "learning_rate": 0.003,
      "loss": 4.1937,
      "step": 1857
    },
    {
      "epoch": 0.01858,
      "grad_norm": 0.805524803230092,
      "learning_rate": 0.003,
      "loss": 4.2015,
      "step": 1858
    },
    {
      "epoch": 0.01859,
      "grad_norm": 0.826226040676062,
      "learning_rate": 0.003,
      "loss": 4.2023,
      "step": 1859
    },
    {
      "epoch": 0.0186,
      "grad_norm": 0.7646190876843869,
      "learning_rate": 0.003,
      "loss": 4.1808,
      "step": 1860
    },
    {
      "epoch": 0.01861,
      "grad_norm": 0.9398444707205434,
      "learning_rate": 0.003,
      "loss": 4.229,
      "step": 1861
    },
    {
      "epoch": 0.01862,
      "grad_norm": 1.0799822817783482,
      "learning_rate": 0.003,
      "loss": 4.208,
      "step": 1862
    },
    {
      "epoch": 0.01863,
      "grad_norm": 1.0663393244093409,
      "learning_rate": 0.003,
      "loss": 4.2176,
      "step": 1863
    },
    {
      "epoch": 0.01864,
      "grad_norm": 0.9063119158507146,
      "learning_rate": 0.003,
      "loss": 4.243,
      "step": 1864
    },
    {
      "epoch": 0.01865,
      "grad_norm": 0.8878085186485055,
      "learning_rate": 0.003,
      "loss": 4.222,
      "step": 1865
    },
    {
      "epoch": 0.01866,
      "grad_norm": 0.8278859311900081,
      "learning_rate": 0.003,
      "loss": 4.2161,
      "step": 1866
    },
    {
      "epoch": 0.01867,
      "grad_norm": 0.9175811468419192,
      "learning_rate": 0.003,
      "loss": 4.1983,
      "step": 1867
    },
    {
      "epoch": 0.01868,
      "grad_norm": 0.8195895794655929,
      "learning_rate": 0.003,
      "loss": 4.2097,
      "step": 1868
    },
    {
      "epoch": 0.01869,
      "grad_norm": 0.6958671652018995,
      "learning_rate": 0.003,
      "loss": 4.2233,
      "step": 1869
    },
    {
      "epoch": 0.0187,
      "grad_norm": 0.7276701584872959,
      "learning_rate": 0.003,
      "loss": 4.2132,
      "step": 1870
    },
    {
      "epoch": 0.01871,
      "grad_norm": 0.700252662578374,
      "learning_rate": 0.003,
      "loss": 4.1886,
      "step": 1871
    },
    {
      "epoch": 0.01872,
      "grad_norm": 0.7936164889803413,
      "learning_rate": 0.003,
      "loss": 4.2323,
      "step": 1872
    },
    {
      "epoch": 0.01873,
      "grad_norm": 0.8026794723116766,
      "learning_rate": 0.003,
      "loss": 4.1938,
      "step": 1873
    },
    {
      "epoch": 0.01874,
      "grad_norm": 0.6813841695856067,
      "learning_rate": 0.003,
      "loss": 4.1939,
      "step": 1874
    },
    {
      "epoch": 0.01875,
      "grad_norm": 0.7703759265763755,
      "learning_rate": 0.003,
      "loss": 4.2154,
      "step": 1875
    },
    {
      "epoch": 0.01876,
      "grad_norm": 0.7048464208203703,
      "learning_rate": 0.003,
      "loss": 4.2195,
      "step": 1876
    },
    {
      "epoch": 0.01877,
      "grad_norm": 0.6552231659596246,
      "learning_rate": 0.003,
      "loss": 4.2204,
      "step": 1877
    },
    {
      "epoch": 0.01878,
      "grad_norm": 0.6594564394551935,
      "learning_rate": 0.003,
      "loss": 4.2061,
      "step": 1878
    },
    {
      "epoch": 0.01879,
      "grad_norm": 0.6775220505077814,
      "learning_rate": 0.003,
      "loss": 4.1841,
      "step": 1879
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.7469198030113778,
      "learning_rate": 0.003,
      "loss": 4.1948,
      "step": 1880
    },
    {
      "epoch": 0.01881,
      "grad_norm": 0.8052292593043413,
      "learning_rate": 0.003,
      "loss": 4.1959,
      "step": 1881
    },
    {
      "epoch": 0.01882,
      "grad_norm": 0.9310051602523918,
      "learning_rate": 0.003,
      "loss": 4.2256,
      "step": 1882
    },
    {
      "epoch": 0.01883,
      "grad_norm": 0.8470067207894152,
      "learning_rate": 0.003,
      "loss": 4.2057,
      "step": 1883
    },
    {
      "epoch": 0.01884,
      "grad_norm": 0.7163623431526733,
      "learning_rate": 0.003,
      "loss": 4.1948,
      "step": 1884
    },
    {
      "epoch": 0.01885,
      "grad_norm": 0.7917195791610646,
      "learning_rate": 0.003,
      "loss": 4.1745,
      "step": 1885
    },
    {
      "epoch": 0.01886,
      "grad_norm": 0.7950048534183088,
      "learning_rate": 0.003,
      "loss": 4.2171,
      "step": 1886
    },
    {
      "epoch": 0.01887,
      "grad_norm": 0.7209518927340428,
      "learning_rate": 0.003,
      "loss": 4.2025,
      "step": 1887
    },
    {
      "epoch": 0.01888,
      "grad_norm": 0.633107358042472,
      "learning_rate": 0.003,
      "loss": 4.1883,
      "step": 1888
    },
    {
      "epoch": 0.01889,
      "grad_norm": 0.6143038001546575,
      "learning_rate": 0.003,
      "loss": 4.1996,
      "step": 1889
    },
    {
      "epoch": 0.0189,
      "grad_norm": 0.5158052453680656,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 1890
    },
    {
      "epoch": 0.01891,
      "grad_norm": 0.4633760186396196,
      "learning_rate": 0.003,
      "loss": 4.2224,
      "step": 1891
    },
    {
      "epoch": 0.01892,
      "grad_norm": 0.4927650862046063,
      "learning_rate": 0.003,
      "loss": 4.1956,
      "step": 1892
    },
    {
      "epoch": 0.01893,
      "grad_norm": 0.4373024700349087,
      "learning_rate": 0.003,
      "loss": 4.1773,
      "step": 1893
    },
    {
      "epoch": 0.01894,
      "grad_norm": 0.40335105556509215,
      "learning_rate": 0.003,
      "loss": 4.1784,
      "step": 1894
    },
    {
      "epoch": 0.01895,
      "grad_norm": 0.3931015972388579,
      "learning_rate": 0.003,
      "loss": 4.1649,
      "step": 1895
    },
    {
      "epoch": 0.01896,
      "grad_norm": 0.369330374322213,
      "learning_rate": 0.003,
      "loss": 4.1861,
      "step": 1896
    },
    {
      "epoch": 0.01897,
      "grad_norm": 0.38394762475942135,
      "learning_rate": 0.003,
      "loss": 4.1841,
      "step": 1897
    },
    {
      "epoch": 0.01898,
      "grad_norm": 0.4616420599151427,
      "learning_rate": 0.003,
      "loss": 4.1533,
      "step": 1898
    },
    {
      "epoch": 0.01899,
      "grad_norm": 0.6786086644128849,
      "learning_rate": 0.003,
      "loss": 4.1744,
      "step": 1899
    },
    {
      "epoch": 0.019,
      "grad_norm": 1.1028955308001205,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 1900
    },
    {
      "epoch": 0.01901,
      "grad_norm": 1.0381658886424407,
      "learning_rate": 0.003,
      "loss": 4.2416,
      "step": 1901
    },
    {
      "epoch": 0.01902,
      "grad_norm": 0.6228046274507568,
      "learning_rate": 0.003,
      "loss": 4.202,
      "step": 1902
    },
    {
      "epoch": 0.01903,
      "grad_norm": 0.552687373313921,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 1903
    },
    {
      "epoch": 0.01904,
      "grad_norm": 0.5642583939104693,
      "learning_rate": 0.003,
      "loss": 4.2113,
      "step": 1904
    },
    {
      "epoch": 0.01905,
      "grad_norm": 0.4866940659015072,
      "learning_rate": 0.003,
      "loss": 4.1815,
      "step": 1905
    },
    {
      "epoch": 0.01906,
      "grad_norm": 0.6770303387740269,
      "learning_rate": 0.003,
      "loss": 4.2038,
      "step": 1906
    },
    {
      "epoch": 0.01907,
      "grad_norm": 0.6799220004643979,
      "learning_rate": 0.003,
      "loss": 4.199,
      "step": 1907
    },
    {
      "epoch": 0.01908,
      "grad_norm": 0.5738027965979297,
      "learning_rate": 0.003,
      "loss": 4.2084,
      "step": 1908
    },
    {
      "epoch": 0.01909,
      "grad_norm": 0.5646602374989558,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 1909
    },
    {
      "epoch": 0.0191,
      "grad_norm": 0.6702075278822099,
      "learning_rate": 0.003,
      "loss": 4.1866,
      "step": 1910
    },
    {
      "epoch": 0.01911,
      "grad_norm": 0.7731308719042311,
      "learning_rate": 0.003,
      "loss": 4.1672,
      "step": 1911
    },
    {
      "epoch": 0.01912,
      "grad_norm": 0.6258475347121828,
      "learning_rate": 0.003,
      "loss": 4.1744,
      "step": 1912
    },
    {
      "epoch": 0.01913,
      "grad_norm": 0.6373334035841824,
      "learning_rate": 0.003,
      "loss": 4.1923,
      "step": 1913
    },
    {
      "epoch": 0.01914,
      "grad_norm": 0.7497697739080498,
      "learning_rate": 0.003,
      "loss": 4.212,
      "step": 1914
    },
    {
      "epoch": 0.01915,
      "grad_norm": 0.8443649873420552,
      "learning_rate": 0.003,
      "loss": 4.204,
      "step": 1915
    },
    {
      "epoch": 0.01916,
      "grad_norm": 0.8327185244516057,
      "learning_rate": 0.003,
      "loss": 4.1829,
      "step": 1916
    },
    {
      "epoch": 0.01917,
      "grad_norm": 0.8831134466171928,
      "learning_rate": 0.003,
      "loss": 4.2052,
      "step": 1917
    },
    {
      "epoch": 0.01918,
      "grad_norm": 0.9318795049142128,
      "learning_rate": 0.003,
      "loss": 4.1944,
      "step": 1918
    },
    {
      "epoch": 0.01919,
      "grad_norm": 0.944334593915859,
      "learning_rate": 0.003,
      "loss": 4.2049,
      "step": 1919
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.9365986219494583,
      "learning_rate": 0.003,
      "loss": 4.2076,
      "step": 1920
    },
    {
      "epoch": 0.01921,
      "grad_norm": 0.9617762005888578,
      "learning_rate": 0.003,
      "loss": 4.2016,
      "step": 1921
    },
    {
      "epoch": 0.01922,
      "grad_norm": 0.9805293516298391,
      "learning_rate": 0.003,
      "loss": 4.2181,
      "step": 1922
    },
    {
      "epoch": 0.01923,
      "grad_norm": 0.8498471851851511,
      "learning_rate": 0.003,
      "loss": 4.2204,
      "step": 1923
    },
    {
      "epoch": 0.01924,
      "grad_norm": 0.9257747078341632,
      "learning_rate": 0.003,
      "loss": 4.2284,
      "step": 1924
    },
    {
      "epoch": 0.01925,
      "grad_norm": 1.0324220402854314,
      "learning_rate": 0.003,
      "loss": 4.2182,
      "step": 1925
    },
    {
      "epoch": 0.01926,
      "grad_norm": 0.9745230962825202,
      "learning_rate": 0.003,
      "loss": 4.2282,
      "step": 1926
    },
    {
      "epoch": 0.01927,
      "grad_norm": 0.7705843813355484,
      "learning_rate": 0.003,
      "loss": 4.2129,
      "step": 1927
    },
    {
      "epoch": 0.01928,
      "grad_norm": 0.5911790602013839,
      "learning_rate": 0.003,
      "loss": 4.1789,
      "step": 1928
    },
    {
      "epoch": 0.01929,
      "grad_norm": 0.5782217294801848,
      "learning_rate": 0.003,
      "loss": 4.1986,
      "step": 1929
    },
    {
      "epoch": 0.0193,
      "grad_norm": 0.5556254348196169,
      "learning_rate": 0.003,
      "loss": 4.1871,
      "step": 1930
    },
    {
      "epoch": 0.01931,
      "grad_norm": 0.6337897693351197,
      "learning_rate": 0.003,
      "loss": 4.188,
      "step": 1931
    },
    {
      "epoch": 0.01932,
      "grad_norm": 0.6851141809276902,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 1932
    },
    {
      "epoch": 0.01933,
      "grad_norm": 0.6532471380653952,
      "learning_rate": 0.003,
      "loss": 4.1838,
      "step": 1933
    },
    {
      "epoch": 0.01934,
      "grad_norm": 0.660311918680962,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 1934
    },
    {
      "epoch": 0.01935,
      "grad_norm": 0.6202434802996027,
      "learning_rate": 0.003,
      "loss": 4.2019,
      "step": 1935
    },
    {
      "epoch": 0.01936,
      "grad_norm": 0.6456697693032586,
      "learning_rate": 0.003,
      "loss": 4.1829,
      "step": 1936
    },
    {
      "epoch": 0.01937,
      "grad_norm": 0.6945407817649866,
      "learning_rate": 0.003,
      "loss": 4.1968,
      "step": 1937
    },
    {
      "epoch": 0.01938,
      "grad_norm": 0.7393543695473613,
      "learning_rate": 0.003,
      "loss": 4.1957,
      "step": 1938
    },
    {
      "epoch": 0.01939,
      "grad_norm": 0.7713579649815137,
      "learning_rate": 0.003,
      "loss": 4.223,
      "step": 1939
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.6228971544711813,
      "learning_rate": 0.003,
      "loss": 4.1928,
      "step": 1940
    },
    {
      "epoch": 0.01941,
      "grad_norm": 0.5726639902946573,
      "learning_rate": 0.003,
      "loss": 4.1739,
      "step": 1941
    },
    {
      "epoch": 0.01942,
      "grad_norm": 0.6064973298236461,
      "learning_rate": 0.003,
      "loss": 4.1989,
      "step": 1942
    },
    {
      "epoch": 0.01943,
      "grad_norm": 0.6888737571597997,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 1943
    },
    {
      "epoch": 0.01944,
      "grad_norm": 0.6708431670479557,
      "learning_rate": 0.003,
      "loss": 4.2158,
      "step": 1944
    },
    {
      "epoch": 0.01945,
      "grad_norm": 0.5681415129494921,
      "learning_rate": 0.003,
      "loss": 4.1787,
      "step": 1945
    },
    {
      "epoch": 0.01946,
      "grad_norm": 0.47730421654011235,
      "learning_rate": 0.003,
      "loss": 4.1716,
      "step": 1946
    },
    {
      "epoch": 0.01947,
      "grad_norm": 0.47116667407771645,
      "learning_rate": 0.003,
      "loss": 4.1791,
      "step": 1947
    },
    {
      "epoch": 0.01948,
      "grad_norm": 0.6002169491076368,
      "learning_rate": 0.003,
      "loss": 4.2004,
      "step": 1948
    },
    {
      "epoch": 0.01949,
      "grad_norm": 0.6804506442238769,
      "learning_rate": 0.003,
      "loss": 4.1961,
      "step": 1949
    },
    {
      "epoch": 0.0195,
      "grad_norm": 0.9031150085350885,
      "learning_rate": 0.003,
      "loss": 4.1909,
      "step": 1950
    },
    {
      "epoch": 0.01951,
      "grad_norm": 1.055108763514552,
      "learning_rate": 0.003,
      "loss": 4.1972,
      "step": 1951
    },
    {
      "epoch": 0.01952,
      "grad_norm": 0.7942759742680753,
      "learning_rate": 0.003,
      "loss": 4.1947,
      "step": 1952
    },
    {
      "epoch": 0.01953,
      "grad_norm": 0.6995419703208098,
      "learning_rate": 0.003,
      "loss": 4.2007,
      "step": 1953
    },
    {
      "epoch": 0.01954,
      "grad_norm": 0.7323475440015588,
      "learning_rate": 0.003,
      "loss": 4.2017,
      "step": 1954
    },
    {
      "epoch": 0.01955,
      "grad_norm": 0.654602209622962,
      "learning_rate": 0.003,
      "loss": 4.1897,
      "step": 1955
    },
    {
      "epoch": 0.01956,
      "grad_norm": 0.6795706239523924,
      "learning_rate": 0.003,
      "loss": 4.188,
      "step": 1956
    },
    {
      "epoch": 0.01957,
      "grad_norm": 0.822104190978127,
      "learning_rate": 0.003,
      "loss": 4.1611,
      "step": 1957
    },
    {
      "epoch": 0.01958,
      "grad_norm": 0.7829306267633842,
      "learning_rate": 0.003,
      "loss": 4.1555,
      "step": 1958
    },
    {
      "epoch": 0.01959,
      "grad_norm": 0.7521187388682534,
      "learning_rate": 0.003,
      "loss": 4.1801,
      "step": 1959
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.8699203949276841,
      "learning_rate": 0.003,
      "loss": 4.1777,
      "step": 1960
    },
    {
      "epoch": 0.01961,
      "grad_norm": 0.9397547951451798,
      "learning_rate": 0.003,
      "loss": 4.1846,
      "step": 1961
    },
    {
      "epoch": 0.01962,
      "grad_norm": 0.8367517905224471,
      "learning_rate": 0.003,
      "loss": 4.1848,
      "step": 1962
    },
    {
      "epoch": 0.01963,
      "grad_norm": 0.8914011026895058,
      "learning_rate": 0.003,
      "loss": 4.1735,
      "step": 1963
    },
    {
      "epoch": 0.01964,
      "grad_norm": 1.069474116819015,
      "learning_rate": 0.003,
      "loss": 4.2192,
      "step": 1964
    },
    {
      "epoch": 0.01965,
      "grad_norm": 0.9846418582261246,
      "learning_rate": 0.003,
      "loss": 4.2097,
      "step": 1965
    },
    {
      "epoch": 0.01966,
      "grad_norm": 1.190189959584559,
      "learning_rate": 0.003,
      "loss": 4.2246,
      "step": 1966
    },
    {
      "epoch": 0.01967,
      "grad_norm": 0.9331071177303962,
      "learning_rate": 0.003,
      "loss": 4.1941,
      "step": 1967
    },
    {
      "epoch": 0.01968,
      "grad_norm": 0.8236928876485803,
      "learning_rate": 0.003,
      "loss": 4.2092,
      "step": 1968
    },
    {
      "epoch": 0.01969,
      "grad_norm": 0.9035337012628801,
      "learning_rate": 0.003,
      "loss": 4.2128,
      "step": 1969
    },
    {
      "epoch": 0.0197,
      "grad_norm": 0.9584839773684666,
      "learning_rate": 0.003,
      "loss": 4.2139,
      "step": 1970
    },
    {
      "epoch": 0.01971,
      "grad_norm": 1.1976612578146555,
      "learning_rate": 0.003,
      "loss": 4.2078,
      "step": 1971
    },
    {
      "epoch": 0.01972,
      "grad_norm": 1.03766882598544,
      "learning_rate": 0.003,
      "loss": 4.216,
      "step": 1972
    },
    {
      "epoch": 0.01973,
      "grad_norm": 1.157490820023187,
      "learning_rate": 0.003,
      "loss": 4.204,
      "step": 1973
    },
    {
      "epoch": 0.01974,
      "grad_norm": 1.0168705909824025,
      "learning_rate": 0.003,
      "loss": 4.2365,
      "step": 1974
    },
    {
      "epoch": 0.01975,
      "grad_norm": 0.9807823986551194,
      "learning_rate": 0.003,
      "loss": 4.2359,
      "step": 1975
    },
    {
      "epoch": 0.01976,
      "grad_norm": 0.7975927738530828,
      "learning_rate": 0.003,
      "loss": 4.2115,
      "step": 1976
    },
    {
      "epoch": 0.01977,
      "grad_norm": 0.9111601004721311,
      "learning_rate": 0.003,
      "loss": 4.2503,
      "step": 1977
    },
    {
      "epoch": 0.01978,
      "grad_norm": 0.95585271722792,
      "learning_rate": 0.003,
      "loss": 4.2456,
      "step": 1978
    },
    {
      "epoch": 0.01979,
      "grad_norm": 0.8967358768483624,
      "learning_rate": 0.003,
      "loss": 4.2367,
      "step": 1979
    },
    {
      "epoch": 0.0198,
      "grad_norm": 0.9098539062714608,
      "learning_rate": 0.003,
      "loss": 4.2135,
      "step": 1980
    },
    {
      "epoch": 0.01981,
      "grad_norm": 0.9619207371647327,
      "learning_rate": 0.003,
      "loss": 4.2036,
      "step": 1981
    },
    {
      "epoch": 0.01982,
      "grad_norm": 1.004618203421798,
      "learning_rate": 0.003,
      "loss": 4.2311,
      "step": 1982
    },
    {
      "epoch": 0.01983,
      "grad_norm": 0.8778331022367669,
      "learning_rate": 0.003,
      "loss": 4.2203,
      "step": 1983
    },
    {
      "epoch": 0.01984,
      "grad_norm": 0.7409261134848839,
      "learning_rate": 0.003,
      "loss": 4.2234,
      "step": 1984
    },
    {
      "epoch": 0.01985,
      "grad_norm": 0.7987127824711159,
      "learning_rate": 0.003,
      "loss": 4.2126,
      "step": 1985
    },
    {
      "epoch": 0.01986,
      "grad_norm": 0.8488590004969258,
      "learning_rate": 0.003,
      "loss": 4.2379,
      "step": 1986
    },
    {
      "epoch": 0.01987,
      "grad_norm": 0.856993962490694,
      "learning_rate": 0.003,
      "loss": 4.1987,
      "step": 1987
    },
    {
      "epoch": 0.01988,
      "grad_norm": 0.6814479531298734,
      "learning_rate": 0.003,
      "loss": 4.2021,
      "step": 1988
    },
    {
      "epoch": 0.01989,
      "grad_norm": 0.5447842315255877,
      "learning_rate": 0.003,
      "loss": 4.1871,
      "step": 1989
    },
    {
      "epoch": 0.0199,
      "grad_norm": 0.5760022915320738,
      "learning_rate": 0.003,
      "loss": 4.1964,
      "step": 1990
    },
    {
      "epoch": 0.01991,
      "grad_norm": 0.5132919071482156,
      "learning_rate": 0.003,
      "loss": 4.1738,
      "step": 1991
    },
    {
      "epoch": 0.01992,
      "grad_norm": 0.5851245859941666,
      "learning_rate": 0.003,
      "loss": 4.2128,
      "step": 1992
    },
    {
      "epoch": 0.01993,
      "grad_norm": 0.6267354593691943,
      "learning_rate": 0.003,
      "loss": 4.1956,
      "step": 1993
    },
    {
      "epoch": 0.01994,
      "grad_norm": 0.7099792076486661,
      "learning_rate": 0.003,
      "loss": 4.2188,
      "step": 1994
    },
    {
      "epoch": 0.01995,
      "grad_norm": 0.7678935327418717,
      "learning_rate": 0.003,
      "loss": 4.2111,
      "step": 1995
    },
    {
      "epoch": 0.01996,
      "grad_norm": 0.6394486685805509,
      "learning_rate": 0.003,
      "loss": 4.1755,
      "step": 1996
    },
    {
      "epoch": 0.01997,
      "grad_norm": 0.4226153497592854,
      "learning_rate": 0.003,
      "loss": 4.1972,
      "step": 1997
    },
    {
      "epoch": 0.01998,
      "grad_norm": 0.5031225205891672,
      "learning_rate": 0.003,
      "loss": 4.1844,
      "step": 1998
    },
    {
      "epoch": 0.01999,
      "grad_norm": 0.5395430302746123,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 1999
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.46234156015204186,
      "learning_rate": 0.003,
      "loss": 4.1586,
      "step": 2000
    },
    {
      "epoch": 0.02001,
      "grad_norm": 0.4395345274961725,
      "learning_rate": 0.003,
      "loss": 4.1934,
      "step": 2001
    },
    {
      "epoch": 0.02002,
      "grad_norm": 0.3957661324841875,
      "learning_rate": 0.003,
      "loss": 4.1863,
      "step": 2002
    },
    {
      "epoch": 0.02003,
      "grad_norm": 0.4711152480437331,
      "learning_rate": 0.003,
      "loss": 4.1873,
      "step": 2003
    },
    {
      "epoch": 0.02004,
      "grad_norm": 0.546136823152871,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 2004
    },
    {
      "epoch": 0.02005,
      "grad_norm": 0.654074266015456,
      "learning_rate": 0.003,
      "loss": 4.1803,
      "step": 2005
    },
    {
      "epoch": 0.02006,
      "grad_norm": 0.710370558494441,
      "learning_rate": 0.003,
      "loss": 4.1784,
      "step": 2006
    },
    {
      "epoch": 0.02007,
      "grad_norm": 0.7542070025547006,
      "learning_rate": 0.003,
      "loss": 4.1945,
      "step": 2007
    },
    {
      "epoch": 0.02008,
      "grad_norm": 0.7773827468349818,
      "learning_rate": 0.003,
      "loss": 4.191,
      "step": 2008
    },
    {
      "epoch": 0.02009,
      "grad_norm": 0.6899352222366664,
      "learning_rate": 0.003,
      "loss": 4.1857,
      "step": 2009
    },
    {
      "epoch": 0.0201,
      "grad_norm": 0.5614651061397582,
      "learning_rate": 0.003,
      "loss": 4.1935,
      "step": 2010
    },
    {
      "epoch": 0.02011,
      "grad_norm": 0.6625002839338998,
      "learning_rate": 0.003,
      "loss": 4.183,
      "step": 2011
    },
    {
      "epoch": 0.02012,
      "grad_norm": 0.6997403670690155,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 2012
    },
    {
      "epoch": 0.02013,
      "grad_norm": 0.6697515151480535,
      "learning_rate": 0.003,
      "loss": 4.1896,
      "step": 2013
    },
    {
      "epoch": 0.02014,
      "grad_norm": 0.6386903403073065,
      "learning_rate": 0.003,
      "loss": 4.1729,
      "step": 2014
    },
    {
      "epoch": 0.02015,
      "grad_norm": 0.5907166723123874,
      "learning_rate": 0.003,
      "loss": 4.1657,
      "step": 2015
    },
    {
      "epoch": 0.02016,
      "grad_norm": 0.605106268984101,
      "learning_rate": 0.003,
      "loss": 4.1769,
      "step": 2016
    },
    {
      "epoch": 0.02017,
      "grad_norm": 0.6631806062284518,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 2017
    },
    {
      "epoch": 0.02018,
      "grad_norm": 0.6347297750042842,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 2018
    },
    {
      "epoch": 0.02019,
      "grad_norm": 0.8248022848153107,
      "learning_rate": 0.003,
      "loss": 4.1834,
      "step": 2019
    },
    {
      "epoch": 0.0202,
      "grad_norm": 0.9904970732072179,
      "learning_rate": 0.003,
      "loss": 4.2031,
      "step": 2020
    },
    {
      "epoch": 0.02021,
      "grad_norm": 0.9086090012812373,
      "learning_rate": 0.003,
      "loss": 4.2148,
      "step": 2021
    },
    {
      "epoch": 0.02022,
      "grad_norm": 0.9232445860299345,
      "learning_rate": 0.003,
      "loss": 4.175,
      "step": 2022
    },
    {
      "epoch": 0.02023,
      "grad_norm": 0.8050131783240017,
      "learning_rate": 0.003,
      "loss": 4.1493,
      "step": 2023
    },
    {
      "epoch": 0.02024,
      "grad_norm": 0.7442373542055678,
      "learning_rate": 0.003,
      "loss": 4.1717,
      "step": 2024
    },
    {
      "epoch": 0.02025,
      "grad_norm": 0.9832310341451624,
      "learning_rate": 0.003,
      "loss": 4.2324,
      "step": 2025
    },
    {
      "epoch": 0.02026,
      "grad_norm": 0.9556185585548506,
      "learning_rate": 0.003,
      "loss": 4.2147,
      "step": 2026
    },
    {
      "epoch": 0.02027,
      "grad_norm": 1.0633653823279028,
      "learning_rate": 0.003,
      "loss": 4.218,
      "step": 2027
    },
    {
      "epoch": 0.02028,
      "grad_norm": 0.8361708667618777,
      "learning_rate": 0.003,
      "loss": 4.1941,
      "step": 2028
    },
    {
      "epoch": 0.02029,
      "grad_norm": 0.8621114948206428,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 2029
    },
    {
      "epoch": 0.0203,
      "grad_norm": 0.7505639644342775,
      "learning_rate": 0.003,
      "loss": 4.1873,
      "step": 2030
    },
    {
      "epoch": 0.02031,
      "grad_norm": 0.7688539298918926,
      "learning_rate": 0.003,
      "loss": 4.1919,
      "step": 2031
    },
    {
      "epoch": 0.02032,
      "grad_norm": 0.8008832349814213,
      "learning_rate": 0.003,
      "loss": 4.1682,
      "step": 2032
    },
    {
      "epoch": 0.02033,
      "grad_norm": 0.7744220160391694,
      "learning_rate": 0.003,
      "loss": 4.2069,
      "step": 2033
    },
    {
      "epoch": 0.02034,
      "grad_norm": 0.9334621419858093,
      "learning_rate": 0.003,
      "loss": 4.2063,
      "step": 2034
    },
    {
      "epoch": 0.02035,
      "grad_norm": 1.0003714475706196,
      "learning_rate": 0.003,
      "loss": 4.1981,
      "step": 2035
    },
    {
      "epoch": 0.02036,
      "grad_norm": 1.0165755074456069,
      "learning_rate": 0.003,
      "loss": 4.2039,
      "step": 2036
    },
    {
      "epoch": 0.02037,
      "grad_norm": 0.8752656941969562,
      "learning_rate": 0.003,
      "loss": 4.2133,
      "step": 2037
    },
    {
      "epoch": 0.02038,
      "grad_norm": 0.8030828089902575,
      "learning_rate": 0.003,
      "loss": 4.1951,
      "step": 2038
    },
    {
      "epoch": 0.02039,
      "grad_norm": 0.6849651287466818,
      "learning_rate": 0.003,
      "loss": 4.1984,
      "step": 2039
    },
    {
      "epoch": 0.0204,
      "grad_norm": 0.7048992687138306,
      "learning_rate": 0.003,
      "loss": 4.2015,
      "step": 2040
    },
    {
      "epoch": 0.02041,
      "grad_norm": 0.5846622816322785,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 2041
    },
    {
      "epoch": 0.02042,
      "grad_norm": 0.579059647971908,
      "learning_rate": 0.003,
      "loss": 4.159,
      "step": 2042
    },
    {
      "epoch": 0.02043,
      "grad_norm": 0.5320216814522557,
      "learning_rate": 0.003,
      "loss": 4.2018,
      "step": 2043
    },
    {
      "epoch": 0.02044,
      "grad_norm": 0.5749364817997652,
      "learning_rate": 0.003,
      "loss": 4.174,
      "step": 2044
    },
    {
      "epoch": 0.02045,
      "grad_norm": 0.5872195224989512,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 2045
    },
    {
      "epoch": 0.02046,
      "grad_norm": 0.6166345493647961,
      "learning_rate": 0.003,
      "loss": 4.1634,
      "step": 2046
    },
    {
      "epoch": 0.02047,
      "grad_norm": 0.7223919170905914,
      "learning_rate": 0.003,
      "loss": 4.2037,
      "step": 2047
    },
    {
      "epoch": 0.02048,
      "grad_norm": 0.7799560828695167,
      "learning_rate": 0.003,
      "loss": 4.1877,
      "step": 2048
    },
    {
      "epoch": 0.02049,
      "grad_norm": 0.8702209657581298,
      "learning_rate": 0.003,
      "loss": 4.2131,
      "step": 2049
    },
    {
      "epoch": 0.0205,
      "grad_norm": 0.9375771238974213,
      "learning_rate": 0.003,
      "loss": 4.1631,
      "step": 2050
    },
    {
      "epoch": 0.02051,
      "grad_norm": 1.1873073578231947,
      "learning_rate": 0.003,
      "loss": 4.178,
      "step": 2051
    },
    {
      "epoch": 0.02052,
      "grad_norm": 1.03209456773,
      "learning_rate": 0.003,
      "loss": 4.1777,
      "step": 2052
    },
    {
      "epoch": 0.02053,
      "grad_norm": 1.0555848829046368,
      "learning_rate": 0.003,
      "loss": 4.1832,
      "step": 2053
    },
    {
      "epoch": 0.02054,
      "grad_norm": 0.9050417954028227,
      "learning_rate": 0.003,
      "loss": 4.2168,
      "step": 2054
    },
    {
      "epoch": 0.02055,
      "grad_norm": 0.8496945287219897,
      "learning_rate": 0.003,
      "loss": 4.1765,
      "step": 2055
    },
    {
      "epoch": 0.02056,
      "grad_norm": 0.7540734931162184,
      "learning_rate": 0.003,
      "loss": 4.17,
      "step": 2056
    },
    {
      "epoch": 0.02057,
      "grad_norm": 0.79729190390271,
      "learning_rate": 0.003,
      "loss": 4.2076,
      "step": 2057
    },
    {
      "epoch": 0.02058,
      "grad_norm": 0.8092766990194165,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 2058
    },
    {
      "epoch": 0.02059,
      "grad_norm": 0.787520651985971,
      "learning_rate": 0.003,
      "loss": 4.1662,
      "step": 2059
    },
    {
      "epoch": 0.0206,
      "grad_norm": 0.8179849805821454,
      "learning_rate": 0.003,
      "loss": 4.2123,
      "step": 2060
    },
    {
      "epoch": 0.02061,
      "grad_norm": 0.765277725532741,
      "learning_rate": 0.003,
      "loss": 4.1867,
      "step": 2061
    },
    {
      "epoch": 0.02062,
      "grad_norm": 0.6833167046153735,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 2062
    },
    {
      "epoch": 0.02063,
      "grad_norm": 0.7852297427280791,
      "learning_rate": 0.003,
      "loss": 4.1788,
      "step": 2063
    },
    {
      "epoch": 0.02064,
      "grad_norm": 0.7558602646875056,
      "learning_rate": 0.003,
      "loss": 4.1894,
      "step": 2064
    },
    {
      "epoch": 0.02065,
      "grad_norm": 0.7461513201926369,
      "learning_rate": 0.003,
      "loss": 4.1686,
      "step": 2065
    },
    {
      "epoch": 0.02066,
      "grad_norm": 0.8279588238364882,
      "learning_rate": 0.003,
      "loss": 4.1898,
      "step": 2066
    },
    {
      "epoch": 0.02067,
      "grad_norm": 0.7670898564018478,
      "learning_rate": 0.003,
      "loss": 4.1869,
      "step": 2067
    },
    {
      "epoch": 0.02068,
      "grad_norm": 0.6723403011847539,
      "learning_rate": 0.003,
      "loss": 4.1662,
      "step": 2068
    },
    {
      "epoch": 0.02069,
      "grad_norm": 0.6336511314262159,
      "learning_rate": 0.003,
      "loss": 4.1849,
      "step": 2069
    },
    {
      "epoch": 0.0207,
      "grad_norm": 0.70657998006886,
      "learning_rate": 0.003,
      "loss": 4.1607,
      "step": 2070
    },
    {
      "epoch": 0.02071,
      "grad_norm": 0.7487170820279913,
      "learning_rate": 0.003,
      "loss": 4.1912,
      "step": 2071
    },
    {
      "epoch": 0.02072,
      "grad_norm": 0.7172563865038417,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2072
    },
    {
      "epoch": 0.02073,
      "grad_norm": 0.7815042091167561,
      "learning_rate": 0.003,
      "loss": 4.1796,
      "step": 2073
    },
    {
      "epoch": 0.02074,
      "grad_norm": 0.7869404752550972,
      "learning_rate": 0.003,
      "loss": 4.1864,
      "step": 2074
    },
    {
      "epoch": 0.02075,
      "grad_norm": 0.7115380813621578,
      "learning_rate": 0.003,
      "loss": 4.2091,
      "step": 2075
    },
    {
      "epoch": 0.02076,
      "grad_norm": 0.6271616020332964,
      "learning_rate": 0.003,
      "loss": 4.199,
      "step": 2076
    },
    {
      "epoch": 0.02077,
      "grad_norm": 0.5945516898335454,
      "learning_rate": 0.003,
      "loss": 4.1669,
      "step": 2077
    },
    {
      "epoch": 0.02078,
      "grad_norm": 0.5807187113226505,
      "learning_rate": 0.003,
      "loss": 4.1615,
      "step": 2078
    },
    {
      "epoch": 0.02079,
      "grad_norm": 0.4917157616397027,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 2079
    },
    {
      "epoch": 0.0208,
      "grad_norm": 0.5388783899386764,
      "learning_rate": 0.003,
      "loss": 4.1563,
      "step": 2080
    },
    {
      "epoch": 0.02081,
      "grad_norm": 0.6250429891561764,
      "learning_rate": 0.003,
      "loss": 4.1899,
      "step": 2081
    },
    {
      "epoch": 0.02082,
      "grad_norm": 0.7539524872161132,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 2082
    },
    {
      "epoch": 0.02083,
      "grad_norm": 0.9021795929932535,
      "learning_rate": 0.003,
      "loss": 4.189,
      "step": 2083
    },
    {
      "epoch": 0.02084,
      "grad_norm": 0.9892236370325144,
      "learning_rate": 0.003,
      "loss": 4.1914,
      "step": 2084
    },
    {
      "epoch": 0.02085,
      "grad_norm": 0.9279291435694554,
      "learning_rate": 0.003,
      "loss": 4.1653,
      "step": 2085
    },
    {
      "epoch": 0.02086,
      "grad_norm": 0.7919688525399468,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 2086
    },
    {
      "epoch": 0.02087,
      "grad_norm": 0.7965117787009012,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 2087
    },
    {
      "epoch": 0.02088,
      "grad_norm": 0.882456029692519,
      "learning_rate": 0.003,
      "loss": 4.2097,
      "step": 2088
    },
    {
      "epoch": 0.02089,
      "grad_norm": 0.8843676371103549,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 2089
    },
    {
      "epoch": 0.0209,
      "grad_norm": 0.9095817089780983,
      "learning_rate": 0.003,
      "loss": 4.2024,
      "step": 2090
    },
    {
      "epoch": 0.02091,
      "grad_norm": 0.8360707980502976,
      "learning_rate": 0.003,
      "loss": 4.1884,
      "step": 2091
    },
    {
      "epoch": 0.02092,
      "grad_norm": 0.8954851906550106,
      "learning_rate": 0.003,
      "loss": 4.189,
      "step": 2092
    },
    {
      "epoch": 0.02093,
      "grad_norm": 0.8146056324857043,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 2093
    },
    {
      "epoch": 0.02094,
      "grad_norm": 0.7558490596404085,
      "learning_rate": 0.003,
      "loss": 4.1629,
      "step": 2094
    },
    {
      "epoch": 0.02095,
      "grad_norm": 0.7384447141579936,
      "learning_rate": 0.003,
      "loss": 4.2013,
      "step": 2095
    },
    {
      "epoch": 0.02096,
      "grad_norm": 0.7339651428587286,
      "learning_rate": 0.003,
      "loss": 4.1917,
      "step": 2096
    },
    {
      "epoch": 0.02097,
      "grad_norm": 0.6671356121778469,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 2097
    },
    {
      "epoch": 0.02098,
      "grad_norm": 0.653522008057512,
      "learning_rate": 0.003,
      "loss": 4.1857,
      "step": 2098
    },
    {
      "epoch": 0.02099,
      "grad_norm": 0.6717296847888601,
      "learning_rate": 0.003,
      "loss": 4.1945,
      "step": 2099
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.6641683049026381,
      "learning_rate": 0.003,
      "loss": 4.206,
      "step": 2100
    },
    {
      "epoch": 0.02101,
      "grad_norm": 0.8263568144502693,
      "learning_rate": 0.003,
      "loss": 4.2014,
      "step": 2101
    },
    {
      "epoch": 0.02102,
      "grad_norm": 0.9182856357229072,
      "learning_rate": 0.003,
      "loss": 4.2116,
      "step": 2102
    },
    {
      "epoch": 0.02103,
      "grad_norm": 0.8775651409452159,
      "learning_rate": 0.003,
      "loss": 4.1702,
      "step": 2103
    },
    {
      "epoch": 0.02104,
      "grad_norm": 0.717248452996101,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 2104
    },
    {
      "epoch": 0.02105,
      "grad_norm": 0.6147629172749789,
      "learning_rate": 0.003,
      "loss": 4.1709,
      "step": 2105
    },
    {
      "epoch": 0.02106,
      "grad_norm": 0.5350819274969562,
      "learning_rate": 0.003,
      "loss": 4.1599,
      "step": 2106
    },
    {
      "epoch": 0.02107,
      "grad_norm": 0.6022860581700974,
      "learning_rate": 0.003,
      "loss": 4.1732,
      "step": 2107
    },
    {
      "epoch": 0.02108,
      "grad_norm": 0.6033513423948526,
      "learning_rate": 0.003,
      "loss": 4.1543,
      "step": 2108
    },
    {
      "epoch": 0.02109,
      "grad_norm": 0.6042225440605065,
      "learning_rate": 0.003,
      "loss": 4.144,
      "step": 2109
    },
    {
      "epoch": 0.0211,
      "grad_norm": 0.560183095906506,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 2110
    },
    {
      "epoch": 0.02111,
      "grad_norm": 0.5249161566388668,
      "learning_rate": 0.003,
      "loss": 4.1663,
      "step": 2111
    },
    {
      "epoch": 0.02112,
      "grad_norm": 0.5391312057228763,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 2112
    },
    {
      "epoch": 0.02113,
      "grad_norm": 0.5260626981895078,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 2113
    },
    {
      "epoch": 0.02114,
      "grad_norm": 0.5072590511188936,
      "learning_rate": 0.003,
      "loss": 4.1483,
      "step": 2114
    },
    {
      "epoch": 0.02115,
      "grad_norm": 0.5460141991570172,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 2115
    },
    {
      "epoch": 0.02116,
      "grad_norm": 0.5749891422597724,
      "learning_rate": 0.003,
      "loss": 4.1705,
      "step": 2116
    },
    {
      "epoch": 0.02117,
      "grad_norm": 0.6781459947006083,
      "learning_rate": 0.003,
      "loss": 4.1668,
      "step": 2117
    },
    {
      "epoch": 0.02118,
      "grad_norm": 0.797658364148174,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 2118
    },
    {
      "epoch": 0.02119,
      "grad_norm": 0.8688435880924688,
      "learning_rate": 0.003,
      "loss": 4.1833,
      "step": 2119
    },
    {
      "epoch": 0.0212,
      "grad_norm": 0.7833741674522176,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 2120
    },
    {
      "epoch": 0.02121,
      "grad_norm": 0.791532521338522,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 2121
    },
    {
      "epoch": 0.02122,
      "grad_norm": 0.7913660839492322,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 2122
    },
    {
      "epoch": 0.02123,
      "grad_norm": 0.7506478823984966,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 2123
    },
    {
      "epoch": 0.02124,
      "grad_norm": 0.7580020078198468,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 2124
    },
    {
      "epoch": 0.02125,
      "grad_norm": 0.7006901754929752,
      "learning_rate": 0.003,
      "loss": 4.1792,
      "step": 2125
    },
    {
      "epoch": 0.02126,
      "grad_norm": 0.7653616945644249,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 2126
    },
    {
      "epoch": 0.02127,
      "grad_norm": 0.7492928706532664,
      "learning_rate": 0.003,
      "loss": 4.1859,
      "step": 2127
    },
    {
      "epoch": 0.02128,
      "grad_norm": 0.7795854700268753,
      "learning_rate": 0.003,
      "loss": 4.1758,
      "step": 2128
    },
    {
      "epoch": 0.02129,
      "grad_norm": 0.9805419044523191,
      "learning_rate": 0.003,
      "loss": 4.186,
      "step": 2129
    },
    {
      "epoch": 0.0213,
      "grad_norm": 1.0343575534498912,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 2130
    },
    {
      "epoch": 0.02131,
      "grad_norm": 0.9468793830055084,
      "learning_rate": 0.003,
      "loss": 4.1869,
      "step": 2131
    },
    {
      "epoch": 0.02132,
      "grad_norm": 0.9021420581140779,
      "learning_rate": 0.003,
      "loss": 4.2032,
      "step": 2132
    },
    {
      "epoch": 0.02133,
      "grad_norm": 0.8122842455378513,
      "learning_rate": 0.003,
      "loss": 4.1908,
      "step": 2133
    },
    {
      "epoch": 0.02134,
      "grad_norm": 1.06138323809129,
      "learning_rate": 0.003,
      "loss": 4.2048,
      "step": 2134
    },
    {
      "epoch": 0.02135,
      "grad_norm": 1.2270031550495697,
      "learning_rate": 0.003,
      "loss": 4.212,
      "step": 2135
    },
    {
      "epoch": 0.02136,
      "grad_norm": 0.824636655199696,
      "learning_rate": 0.003,
      "loss": 4.1714,
      "step": 2136
    },
    {
      "epoch": 0.02137,
      "grad_norm": 0.7977636532107003,
      "learning_rate": 0.003,
      "loss": 4.2074,
      "step": 2137
    },
    {
      "epoch": 0.02138,
      "grad_norm": 0.8175212273276642,
      "learning_rate": 0.003,
      "loss": 4.2175,
      "step": 2138
    },
    {
      "epoch": 0.02139,
      "grad_norm": 0.8041667304441814,
      "learning_rate": 0.003,
      "loss": 4.2081,
      "step": 2139
    },
    {
      "epoch": 0.0214,
      "grad_norm": 0.9410719295719912,
      "learning_rate": 0.003,
      "loss": 4.2078,
      "step": 2140
    },
    {
      "epoch": 0.02141,
      "grad_norm": 0.9128066534114356,
      "learning_rate": 0.003,
      "loss": 4.1891,
      "step": 2141
    },
    {
      "epoch": 0.02142,
      "grad_norm": 0.7166876786866843,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 2142
    },
    {
      "epoch": 0.02143,
      "grad_norm": 0.7553648404993994,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 2143
    },
    {
      "epoch": 0.02144,
      "grad_norm": 0.9693569859499341,
      "learning_rate": 0.003,
      "loss": 4.1979,
      "step": 2144
    },
    {
      "epoch": 0.02145,
      "grad_norm": 1.005405043081365,
      "learning_rate": 0.003,
      "loss": 4.1853,
      "step": 2145
    },
    {
      "epoch": 0.02146,
      "grad_norm": 0.9174606002788802,
      "learning_rate": 0.003,
      "loss": 4.1967,
      "step": 2146
    },
    {
      "epoch": 0.02147,
      "grad_norm": 0.7660679608210765,
      "learning_rate": 0.003,
      "loss": 4.2109,
      "step": 2147
    },
    {
      "epoch": 0.02148,
      "grad_norm": 0.7067244008901828,
      "learning_rate": 0.003,
      "loss": 4.222,
      "step": 2148
    },
    {
      "epoch": 0.02149,
      "grad_norm": 0.5597922211015285,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2149
    },
    {
      "epoch": 0.0215,
      "grad_norm": 0.5887191266363829,
      "learning_rate": 0.003,
      "loss": 4.1847,
      "step": 2150
    },
    {
      "epoch": 0.02151,
      "grad_norm": 0.5623719175027401,
      "learning_rate": 0.003,
      "loss": 4.1889,
      "step": 2151
    },
    {
      "epoch": 0.02152,
      "grad_norm": 0.5432643275971011,
      "learning_rate": 0.003,
      "loss": 4.1639,
      "step": 2152
    },
    {
      "epoch": 0.02153,
      "grad_norm": 0.5447875969469528,
      "learning_rate": 0.003,
      "loss": 4.1717,
      "step": 2153
    },
    {
      "epoch": 0.02154,
      "grad_norm": 0.64393428597859,
      "learning_rate": 0.003,
      "loss": 4.1635,
      "step": 2154
    },
    {
      "epoch": 0.02155,
      "grad_norm": 0.836540801508149,
      "learning_rate": 0.003,
      "loss": 4.1972,
      "step": 2155
    },
    {
      "epoch": 0.02156,
      "grad_norm": 0.9453076222627308,
      "learning_rate": 0.003,
      "loss": 4.1938,
      "step": 2156
    },
    {
      "epoch": 0.02157,
      "grad_norm": 0.7417036890833684,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 2157
    },
    {
      "epoch": 0.02158,
      "grad_norm": 0.5957713091881504,
      "learning_rate": 0.003,
      "loss": 4.1686,
      "step": 2158
    },
    {
      "epoch": 0.02159,
      "grad_norm": 0.6018851123999939,
      "learning_rate": 0.003,
      "loss": 4.1807,
      "step": 2159
    },
    {
      "epoch": 0.0216,
      "grad_norm": 0.6716376115123553,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 2160
    },
    {
      "epoch": 0.02161,
      "grad_norm": 0.7819649346338547,
      "learning_rate": 0.003,
      "loss": 4.1755,
      "step": 2161
    },
    {
      "epoch": 0.02162,
      "grad_norm": 0.8233828232110464,
      "learning_rate": 0.003,
      "loss": 4.1801,
      "step": 2162
    },
    {
      "epoch": 0.02163,
      "grad_norm": 0.7824568090505709,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2163
    },
    {
      "epoch": 0.02164,
      "grad_norm": 0.8597315749146158,
      "learning_rate": 0.003,
      "loss": 4.1915,
      "step": 2164
    },
    {
      "epoch": 0.02165,
      "grad_norm": 0.7979744235864884,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 2165
    },
    {
      "epoch": 0.02166,
      "grad_norm": 0.7759936363363831,
      "learning_rate": 0.003,
      "loss": 4.1947,
      "step": 2166
    },
    {
      "epoch": 0.02167,
      "grad_norm": 0.7097277369414112,
      "learning_rate": 0.003,
      "loss": 4.1708,
      "step": 2167
    },
    {
      "epoch": 0.02168,
      "grad_norm": 0.6129275664005919,
      "learning_rate": 0.003,
      "loss": 4.1674,
      "step": 2168
    },
    {
      "epoch": 0.02169,
      "grad_norm": 0.5617202579327312,
      "learning_rate": 0.003,
      "loss": 4.1636,
      "step": 2169
    },
    {
      "epoch": 0.0217,
      "grad_norm": 0.4729981033232719,
      "learning_rate": 0.003,
      "loss": 4.1693,
      "step": 2170
    },
    {
      "epoch": 0.02171,
      "grad_norm": 0.5127457916731437,
      "learning_rate": 0.003,
      "loss": 4.1819,
      "step": 2171
    },
    {
      "epoch": 0.02172,
      "grad_norm": 0.6304303564787295,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 2172
    },
    {
      "epoch": 0.02173,
      "grad_norm": 0.7235970129567234,
      "learning_rate": 0.003,
      "loss": 4.1636,
      "step": 2173
    },
    {
      "epoch": 0.02174,
      "grad_norm": 0.9537416454234164,
      "learning_rate": 0.003,
      "loss": 4.1668,
      "step": 2174
    },
    {
      "epoch": 0.02175,
      "grad_norm": 1.0772501316127283,
      "learning_rate": 0.003,
      "loss": 4.2082,
      "step": 2175
    },
    {
      "epoch": 0.02176,
      "grad_norm": 0.9320344556958656,
      "learning_rate": 0.003,
      "loss": 4.1798,
      "step": 2176
    },
    {
      "epoch": 0.02177,
      "grad_norm": 1.2306562612139564,
      "learning_rate": 0.003,
      "loss": 4.2022,
      "step": 2177
    },
    {
      "epoch": 0.02178,
      "grad_norm": 0.9649462703992993,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 2178
    },
    {
      "epoch": 0.02179,
      "grad_norm": 0.8364440339114567,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2179
    },
    {
      "epoch": 0.0218,
      "grad_norm": 0.8494241782855642,
      "learning_rate": 0.003,
      "loss": 4.2094,
      "step": 2180
    },
    {
      "epoch": 0.02181,
      "grad_norm": 0.8028341199053393,
      "learning_rate": 0.003,
      "loss": 4.2103,
      "step": 2181
    },
    {
      "epoch": 0.02182,
      "grad_norm": 0.8789575016731874,
      "learning_rate": 0.003,
      "loss": 4.1991,
      "step": 2182
    },
    {
      "epoch": 0.02183,
      "grad_norm": 0.7339272838818488,
      "learning_rate": 0.003,
      "loss": 4.2018,
      "step": 2183
    },
    {
      "epoch": 0.02184,
      "grad_norm": 0.6992146186501603,
      "learning_rate": 0.003,
      "loss": 4.1762,
      "step": 2184
    },
    {
      "epoch": 0.02185,
      "grad_norm": 0.6859058633396239,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 2185
    },
    {
      "epoch": 0.02186,
      "grad_norm": 0.671595445918346,
      "learning_rate": 0.003,
      "loss": 4.186,
      "step": 2186
    },
    {
      "epoch": 0.02187,
      "grad_norm": 0.6535925166183008,
      "learning_rate": 0.003,
      "loss": 4.1812,
      "step": 2187
    },
    {
      "epoch": 0.02188,
      "grad_norm": 0.6198661745230879,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 2188
    },
    {
      "epoch": 0.02189,
      "grad_norm": 0.5776255514929863,
      "learning_rate": 0.003,
      "loss": 4.1869,
      "step": 2189
    },
    {
      "epoch": 0.0219,
      "grad_norm": 0.7433015793364336,
      "learning_rate": 0.003,
      "loss": 4.1787,
      "step": 2190
    },
    {
      "epoch": 0.02191,
      "grad_norm": 0.8514656528184604,
      "learning_rate": 0.003,
      "loss": 4.1806,
      "step": 2191
    },
    {
      "epoch": 0.02192,
      "grad_norm": 0.9191452260055095,
      "learning_rate": 0.003,
      "loss": 4.2122,
      "step": 2192
    },
    {
      "epoch": 0.02193,
      "grad_norm": 0.7405436618584794,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 2193
    },
    {
      "epoch": 0.02194,
      "grad_norm": 0.7140573914846811,
      "learning_rate": 0.003,
      "loss": 4.2215,
      "step": 2194
    },
    {
      "epoch": 0.02195,
      "grad_norm": 0.7595143608079454,
      "learning_rate": 0.003,
      "loss": 4.1859,
      "step": 2195
    },
    {
      "epoch": 0.02196,
      "grad_norm": 0.7549194736868536,
      "learning_rate": 0.003,
      "loss": 4.1951,
      "step": 2196
    },
    {
      "epoch": 0.02197,
      "grad_norm": 0.6975555957838411,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 2197
    },
    {
      "epoch": 0.02198,
      "grad_norm": 0.7595090640426224,
      "learning_rate": 0.003,
      "loss": 4.1756,
      "step": 2198
    },
    {
      "epoch": 0.02199,
      "grad_norm": 0.7642539057941399,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 2199
    },
    {
      "epoch": 0.022,
      "grad_norm": 0.7482736596797179,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 2200
    },
    {
      "epoch": 0.02201,
      "grad_norm": 0.6545018698949331,
      "learning_rate": 0.003,
      "loss": 4.1824,
      "step": 2201
    },
    {
      "epoch": 0.02202,
      "grad_norm": 0.6269777415335018,
      "learning_rate": 0.003,
      "loss": 4.1683,
      "step": 2202
    },
    {
      "epoch": 0.02203,
      "grad_norm": 0.7202806939294419,
      "learning_rate": 0.003,
      "loss": 4.1965,
      "step": 2203
    },
    {
      "epoch": 0.02204,
      "grad_norm": 0.7354393023905447,
      "learning_rate": 0.003,
      "loss": 4.1919,
      "step": 2204
    },
    {
      "epoch": 0.02205,
      "grad_norm": 0.6658405305239573,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 2205
    },
    {
      "epoch": 0.02206,
      "grad_norm": 0.5494433484311174,
      "learning_rate": 0.003,
      "loss": 4.1509,
      "step": 2206
    },
    {
      "epoch": 0.02207,
      "grad_norm": 0.48191881483377974,
      "learning_rate": 0.003,
      "loss": 4.1617,
      "step": 2207
    },
    {
      "epoch": 0.02208,
      "grad_norm": 0.5055031144775365,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 2208
    },
    {
      "epoch": 0.02209,
      "grad_norm": 0.5328357421096614,
      "learning_rate": 0.003,
      "loss": 4.1587,
      "step": 2209
    },
    {
      "epoch": 0.0221,
      "grad_norm": 0.580303567033314,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 2210
    },
    {
      "epoch": 0.02211,
      "grad_norm": 0.627373370176424,
      "learning_rate": 0.003,
      "loss": 4.1541,
      "step": 2211
    },
    {
      "epoch": 0.02212,
      "grad_norm": 0.8017723022310775,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 2212
    },
    {
      "epoch": 0.02213,
      "grad_norm": 0.9755586674162295,
      "learning_rate": 0.003,
      "loss": 4.1762,
      "step": 2213
    },
    {
      "epoch": 0.02214,
      "grad_norm": 0.8888070506530766,
      "learning_rate": 0.003,
      "loss": 4.1705,
      "step": 2214
    },
    {
      "epoch": 0.02215,
      "grad_norm": 0.8341073466106758,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 2215
    },
    {
      "epoch": 0.02216,
      "grad_norm": 0.8721558185443227,
      "learning_rate": 0.003,
      "loss": 4.1552,
      "step": 2216
    },
    {
      "epoch": 0.02217,
      "grad_norm": 0.9058617906836595,
      "learning_rate": 0.003,
      "loss": 4.1535,
      "step": 2217
    },
    {
      "epoch": 0.02218,
      "grad_norm": 0.8239137730922835,
      "learning_rate": 0.003,
      "loss": 4.1865,
      "step": 2218
    },
    {
      "epoch": 0.02219,
      "grad_norm": 0.7662188590247174,
      "learning_rate": 0.003,
      "loss": 4.1744,
      "step": 2219
    },
    {
      "epoch": 0.0222,
      "grad_norm": 0.7276128620873287,
      "learning_rate": 0.003,
      "loss": 4.1721,
      "step": 2220
    },
    {
      "epoch": 0.02221,
      "grad_norm": 0.8153838699769882,
      "learning_rate": 0.003,
      "loss": 4.1618,
      "step": 2221
    },
    {
      "epoch": 0.02222,
      "grad_norm": 0.9239982401134911,
      "learning_rate": 0.003,
      "loss": 4.1621,
      "step": 2222
    },
    {
      "epoch": 0.02223,
      "grad_norm": 0.9248509160902595,
      "learning_rate": 0.003,
      "loss": 4.1826,
      "step": 2223
    },
    {
      "epoch": 0.02224,
      "grad_norm": 0.8778359853731296,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 2224
    },
    {
      "epoch": 0.02225,
      "grad_norm": 1.1133790365277945,
      "learning_rate": 0.003,
      "loss": 4.1791,
      "step": 2225
    },
    {
      "epoch": 0.02226,
      "grad_norm": 0.9357923626997176,
      "learning_rate": 0.003,
      "loss": 4.1895,
      "step": 2226
    },
    {
      "epoch": 0.02227,
      "grad_norm": 0.8646361690518631,
      "learning_rate": 0.003,
      "loss": 4.183,
      "step": 2227
    },
    {
      "epoch": 0.02228,
      "grad_norm": 0.938807129740724,
      "learning_rate": 0.003,
      "loss": 4.1718,
      "step": 2228
    },
    {
      "epoch": 0.02229,
      "grad_norm": 0.8004751977002076,
      "learning_rate": 0.003,
      "loss": 4.149,
      "step": 2229
    },
    {
      "epoch": 0.0223,
      "grad_norm": 0.7882101721343657,
      "learning_rate": 0.003,
      "loss": 4.1858,
      "step": 2230
    },
    {
      "epoch": 0.02231,
      "grad_norm": 0.7274707205896823,
      "learning_rate": 0.003,
      "loss": 4.1896,
      "step": 2231
    },
    {
      "epoch": 0.02232,
      "grad_norm": 0.8291412822325788,
      "learning_rate": 0.003,
      "loss": 4.2,
      "step": 2232
    },
    {
      "epoch": 0.02233,
      "grad_norm": 0.8063025430085007,
      "learning_rate": 0.003,
      "loss": 4.166,
      "step": 2233
    },
    {
      "epoch": 0.02234,
      "grad_norm": 0.6911537576664851,
      "learning_rate": 0.003,
      "loss": 4.1865,
      "step": 2234
    },
    {
      "epoch": 0.02235,
      "grad_norm": 0.7448703016841659,
      "learning_rate": 0.003,
      "loss": 4.2113,
      "step": 2235
    },
    {
      "epoch": 0.02236,
      "grad_norm": 0.8233234836151326,
      "learning_rate": 0.003,
      "loss": 4.1655,
      "step": 2236
    },
    {
      "epoch": 0.02237,
      "grad_norm": 1.0093335974280213,
      "learning_rate": 0.003,
      "loss": 4.2052,
      "step": 2237
    },
    {
      "epoch": 0.02238,
      "grad_norm": 1.1018651279861302,
      "learning_rate": 0.003,
      "loss": 4.196,
      "step": 2238
    },
    {
      "epoch": 0.02239,
      "grad_norm": 0.7048103914820842,
      "learning_rate": 0.003,
      "loss": 4.1668,
      "step": 2239
    },
    {
      "epoch": 0.0224,
      "grad_norm": 0.6705866907364436,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 2240
    },
    {
      "epoch": 0.02241,
      "grad_norm": 0.768034661346772,
      "learning_rate": 0.003,
      "loss": 4.1807,
      "step": 2241
    },
    {
      "epoch": 0.02242,
      "grad_norm": 0.7013024117659296,
      "learning_rate": 0.003,
      "loss": 4.1591,
      "step": 2242
    },
    {
      "epoch": 0.02243,
      "grad_norm": 0.8407412837981002,
      "learning_rate": 0.003,
      "loss": 4.1937,
      "step": 2243
    },
    {
      "epoch": 0.02244,
      "grad_norm": 0.8584973105281422,
      "learning_rate": 0.003,
      "loss": 4.1731,
      "step": 2244
    },
    {
      "epoch": 0.02245,
      "grad_norm": 0.810849020894934,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 2245
    },
    {
      "epoch": 0.02246,
      "grad_norm": 0.7441445696162579,
      "learning_rate": 0.003,
      "loss": 4.191,
      "step": 2246
    },
    {
      "epoch": 0.02247,
      "grad_norm": 0.719895099061681,
      "learning_rate": 0.003,
      "loss": 4.162,
      "step": 2247
    },
    {
      "epoch": 0.02248,
      "grad_norm": 0.6276605133960077,
      "learning_rate": 0.003,
      "loss": 4.1652,
      "step": 2248
    },
    {
      "epoch": 0.02249,
      "grad_norm": 0.5820105403717251,
      "learning_rate": 0.003,
      "loss": 4.1561,
      "step": 2249
    },
    {
      "epoch": 0.0225,
      "grad_norm": 0.494797181910464,
      "learning_rate": 0.003,
      "loss": 4.1646,
      "step": 2250
    },
    {
      "epoch": 0.02251,
      "grad_norm": 0.45799261155941245,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 2251
    },
    {
      "epoch": 0.02252,
      "grad_norm": 0.4312165334190654,
      "learning_rate": 0.003,
      "loss": 4.1745,
      "step": 2252
    },
    {
      "epoch": 0.02253,
      "grad_norm": 0.4089956454491447,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 2253
    },
    {
      "epoch": 0.02254,
      "grad_norm": 0.41711525641558334,
      "learning_rate": 0.003,
      "loss": 4.1591,
      "step": 2254
    },
    {
      "epoch": 0.02255,
      "grad_norm": 0.39887312531471364,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 2255
    },
    {
      "epoch": 0.02256,
      "grad_norm": 0.44922284802782086,
      "learning_rate": 0.003,
      "loss": 4.1321,
      "step": 2256
    },
    {
      "epoch": 0.02257,
      "grad_norm": 0.43881289908027915,
      "learning_rate": 0.003,
      "loss": 4.1476,
      "step": 2257
    },
    {
      "epoch": 0.02258,
      "grad_norm": 0.42711858762354266,
      "learning_rate": 0.003,
      "loss": 4.1569,
      "step": 2258
    },
    {
      "epoch": 0.02259,
      "grad_norm": 0.4761674249696505,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 2259
    },
    {
      "epoch": 0.0226,
      "grad_norm": 0.6172223177216527,
      "learning_rate": 0.003,
      "loss": 4.1745,
      "step": 2260
    },
    {
      "epoch": 0.02261,
      "grad_norm": 0.9969811836278653,
      "learning_rate": 0.003,
      "loss": 4.1626,
      "step": 2261
    },
    {
      "epoch": 0.02262,
      "grad_norm": 1.4207495716773118,
      "learning_rate": 0.003,
      "loss": 4.1955,
      "step": 2262
    },
    {
      "epoch": 0.02263,
      "grad_norm": 0.5107196284950156,
      "learning_rate": 0.003,
      "loss": 4.1749,
      "step": 2263
    },
    {
      "epoch": 0.02264,
      "grad_norm": 0.8124315452603879,
      "learning_rate": 0.003,
      "loss": 4.1794,
      "step": 2264
    },
    {
      "epoch": 0.02265,
      "grad_norm": 0.7127120804095198,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 2265
    },
    {
      "epoch": 0.02266,
      "grad_norm": 0.5120142831669139,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 2266
    },
    {
      "epoch": 0.02267,
      "grad_norm": 0.842715736214197,
      "learning_rate": 0.003,
      "loss": 4.1775,
      "step": 2267
    },
    {
      "epoch": 0.02268,
      "grad_norm": 0.8370937405428989,
      "learning_rate": 0.003,
      "loss": 4.1593,
      "step": 2268
    },
    {
      "epoch": 0.02269,
      "grad_norm": 0.7173617092756458,
      "learning_rate": 0.003,
      "loss": 4.174,
      "step": 2269
    },
    {
      "epoch": 0.0227,
      "grad_norm": 0.8926880986379259,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 2270
    },
    {
      "epoch": 0.02271,
      "grad_norm": 0.9394293424718215,
      "learning_rate": 0.003,
      "loss": 4.1873,
      "step": 2271
    },
    {
      "epoch": 0.02272,
      "grad_norm": 0.9564097717489924,
      "learning_rate": 0.003,
      "loss": 4.1878,
      "step": 2272
    },
    {
      "epoch": 0.02273,
      "grad_norm": 1.2112473190186792,
      "learning_rate": 0.003,
      "loss": 4.1998,
      "step": 2273
    },
    {
      "epoch": 0.02274,
      "grad_norm": 0.9744415053378092,
      "learning_rate": 0.003,
      "loss": 4.1786,
      "step": 2274
    },
    {
      "epoch": 0.02275,
      "grad_norm": 1.0078841940550498,
      "learning_rate": 0.003,
      "loss": 4.1796,
      "step": 2275
    },
    {
      "epoch": 0.02276,
      "grad_norm": 0.8300538512961302,
      "learning_rate": 0.003,
      "loss": 4.1696,
      "step": 2276
    },
    {
      "epoch": 0.02277,
      "grad_norm": 0.6643966250880426,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 2277
    },
    {
      "epoch": 0.02278,
      "grad_norm": 0.6190627183036216,
      "learning_rate": 0.003,
      "loss": 4.1782,
      "step": 2278
    },
    {
      "epoch": 0.02279,
      "grad_norm": 0.551131169249495,
      "learning_rate": 0.003,
      "loss": 4.1827,
      "step": 2279
    },
    {
      "epoch": 0.0228,
      "grad_norm": 0.5572806608313633,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 2280
    },
    {
      "epoch": 0.02281,
      "grad_norm": 0.630640256246517,
      "learning_rate": 0.003,
      "loss": 4.1688,
      "step": 2281
    },
    {
      "epoch": 0.02282,
      "grad_norm": 0.667161853354935,
      "learning_rate": 0.003,
      "loss": 4.181,
      "step": 2282
    },
    {
      "epoch": 0.02283,
      "grad_norm": 0.7804313255347314,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2283
    },
    {
      "epoch": 0.02284,
      "grad_norm": 0.8174062173017085,
      "learning_rate": 0.003,
      "loss": 4.1998,
      "step": 2284
    },
    {
      "epoch": 0.02285,
      "grad_norm": 0.8933970302021632,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 2285
    },
    {
      "epoch": 0.02286,
      "grad_norm": 0.8583328769415308,
      "learning_rate": 0.003,
      "loss": 4.2039,
      "step": 2286
    },
    {
      "epoch": 0.02287,
      "grad_norm": 0.8734810759184407,
      "learning_rate": 0.003,
      "loss": 4.1855,
      "step": 2287
    },
    {
      "epoch": 0.02288,
      "grad_norm": 0.8928624064077537,
      "learning_rate": 0.003,
      "loss": 4.2123,
      "step": 2288
    },
    {
      "epoch": 0.02289,
      "grad_norm": 0.783879288737337,
      "learning_rate": 0.003,
      "loss": 4.2056,
      "step": 2289
    },
    {
      "epoch": 0.0229,
      "grad_norm": 0.7701279568768081,
      "learning_rate": 0.003,
      "loss": 4.1809,
      "step": 2290
    },
    {
      "epoch": 0.02291,
      "grad_norm": 0.7353904143222354,
      "learning_rate": 0.003,
      "loss": 4.1585,
      "step": 2291
    },
    {
      "epoch": 0.02292,
      "grad_norm": 0.5673182764651727,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 2292
    },
    {
      "epoch": 0.02293,
      "grad_norm": 0.6211445071442583,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 2293
    },
    {
      "epoch": 0.02294,
      "grad_norm": 0.5897432424358218,
      "learning_rate": 0.003,
      "loss": 4.1497,
      "step": 2294
    },
    {
      "epoch": 0.02295,
      "grad_norm": 0.6701151196414159,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 2295
    },
    {
      "epoch": 0.02296,
      "grad_norm": 1.0101216354387503,
      "learning_rate": 0.003,
      "loss": 4.1897,
      "step": 2296
    },
    {
      "epoch": 0.02297,
      "grad_norm": 1.2406081319623274,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 2297
    },
    {
      "epoch": 0.02298,
      "grad_norm": 0.5988120231143333,
      "learning_rate": 0.003,
      "loss": 4.1702,
      "step": 2298
    },
    {
      "epoch": 0.02299,
      "grad_norm": 0.7795860072160454,
      "learning_rate": 0.003,
      "loss": 4.1804,
      "step": 2299
    },
    {
      "epoch": 0.023,
      "grad_norm": 0.784959258488215,
      "learning_rate": 0.003,
      "loss": 4.1984,
      "step": 2300
    },
    {
      "epoch": 0.02301,
      "grad_norm": 0.5879362613709453,
      "learning_rate": 0.003,
      "loss": 4.157,
      "step": 2301
    },
    {
      "epoch": 0.02302,
      "grad_norm": 0.7106606864619079,
      "learning_rate": 0.003,
      "loss": 4.1649,
      "step": 2302
    },
    {
      "epoch": 0.02303,
      "grad_norm": 0.7402995000062723,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 2303
    },
    {
      "epoch": 0.02304,
      "grad_norm": 0.8291546359347778,
      "learning_rate": 0.003,
      "loss": 4.1898,
      "step": 2304
    },
    {
      "epoch": 0.02305,
      "grad_norm": 0.753148855618092,
      "learning_rate": 0.003,
      "loss": 4.1615,
      "step": 2305
    },
    {
      "epoch": 0.02306,
      "grad_norm": 0.8869246970763861,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 2306
    },
    {
      "epoch": 0.02307,
      "grad_norm": 0.8925578818662347,
      "learning_rate": 0.003,
      "loss": 4.1758,
      "step": 2307
    },
    {
      "epoch": 0.02308,
      "grad_norm": 0.9066759173397617,
      "learning_rate": 0.003,
      "loss": 4.1762,
      "step": 2308
    },
    {
      "epoch": 0.02309,
      "grad_norm": 0.8201889707383639,
      "learning_rate": 0.003,
      "loss": 4.1952,
      "step": 2309
    },
    {
      "epoch": 0.0231,
      "grad_norm": 0.7904644702923331,
      "learning_rate": 0.003,
      "loss": 4.1751,
      "step": 2310
    },
    {
      "epoch": 0.02311,
      "grad_norm": 0.7060518287169474,
      "learning_rate": 0.003,
      "loss": 4.1776,
      "step": 2311
    },
    {
      "epoch": 0.02312,
      "grad_norm": 0.7392874926033767,
      "learning_rate": 0.003,
      "loss": 4.1519,
      "step": 2312
    },
    {
      "epoch": 0.02313,
      "grad_norm": 0.8538999440765905,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 2313
    },
    {
      "epoch": 0.02314,
      "grad_norm": 0.9312222422709578,
      "learning_rate": 0.003,
      "loss": 4.1718,
      "step": 2314
    },
    {
      "epoch": 0.02315,
      "grad_norm": 0.9687214234200097,
      "learning_rate": 0.003,
      "loss": 4.1771,
      "step": 2315
    },
    {
      "epoch": 0.02316,
      "grad_norm": 0.891697328396861,
      "learning_rate": 0.003,
      "loss": 4.1909,
      "step": 2316
    },
    {
      "epoch": 0.02317,
      "grad_norm": 0.897934373295407,
      "learning_rate": 0.003,
      "loss": 4.1753,
      "step": 2317
    },
    {
      "epoch": 0.02318,
      "grad_norm": 0.8051331418091766,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 2318
    },
    {
      "epoch": 0.02319,
      "grad_norm": 0.703806089932542,
      "learning_rate": 0.003,
      "loss": 4.1804,
      "step": 2319
    },
    {
      "epoch": 0.0232,
      "grad_norm": 0.7388332818016227,
      "learning_rate": 0.003,
      "loss": 4.1898,
      "step": 2320
    },
    {
      "epoch": 0.02321,
      "grad_norm": 0.7450344205679207,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 2321
    },
    {
      "epoch": 0.02322,
      "grad_norm": 0.6660500555618327,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 2322
    },
    {
      "epoch": 0.02323,
      "grad_norm": 0.6571661630697367,
      "learning_rate": 0.003,
      "loss": 4.1481,
      "step": 2323
    },
    {
      "epoch": 0.02324,
      "grad_norm": 0.6161646162269099,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 2324
    },
    {
      "epoch": 0.02325,
      "grad_norm": 0.5919512249258755,
      "learning_rate": 0.003,
      "loss": 4.1679,
      "step": 2325
    },
    {
      "epoch": 0.02326,
      "grad_norm": 0.7224558633075708,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 2326
    },
    {
      "epoch": 0.02327,
      "grad_norm": 0.8816178665411202,
      "learning_rate": 0.003,
      "loss": 4.1759,
      "step": 2327
    },
    {
      "epoch": 0.02328,
      "grad_norm": 0.8699762426391761,
      "learning_rate": 0.003,
      "loss": 4.176,
      "step": 2328
    },
    {
      "epoch": 0.02329,
      "grad_norm": 0.723960245924297,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 2329
    },
    {
      "epoch": 0.0233,
      "grad_norm": 0.7628023637683098,
      "learning_rate": 0.003,
      "loss": 4.1563,
      "step": 2330
    },
    {
      "epoch": 0.02331,
      "grad_norm": 0.7695226471244487,
      "learning_rate": 0.003,
      "loss": 4.1933,
      "step": 2331
    },
    {
      "epoch": 0.02332,
      "grad_norm": 0.7712606828136085,
      "learning_rate": 0.003,
      "loss": 4.1567,
      "step": 2332
    },
    {
      "epoch": 0.02333,
      "grad_norm": 0.7181228868994012,
      "learning_rate": 0.003,
      "loss": 4.1639,
      "step": 2333
    },
    {
      "epoch": 0.02334,
      "grad_norm": 0.6385757918578437,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 2334
    },
    {
      "epoch": 0.02335,
      "grad_norm": 0.5804913608133354,
      "learning_rate": 0.003,
      "loss": 4.1671,
      "step": 2335
    },
    {
      "epoch": 0.02336,
      "grad_norm": 0.6406948035907543,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 2336
    },
    {
      "epoch": 0.02337,
      "grad_norm": 0.7550291811921531,
      "learning_rate": 0.003,
      "loss": 4.1668,
      "step": 2337
    },
    {
      "epoch": 0.02338,
      "grad_norm": 0.867264165349628,
      "learning_rate": 0.003,
      "loss": 4.1574,
      "step": 2338
    },
    {
      "epoch": 0.02339,
      "grad_norm": 0.9370869698358649,
      "learning_rate": 0.003,
      "loss": 4.1596,
      "step": 2339
    },
    {
      "epoch": 0.0234,
      "grad_norm": 1.04114552861465,
      "learning_rate": 0.003,
      "loss": 4.1762,
      "step": 2340
    },
    {
      "epoch": 0.02341,
      "grad_norm": 0.8419443125713769,
      "learning_rate": 0.003,
      "loss": 4.1461,
      "step": 2341
    },
    {
      "epoch": 0.02342,
      "grad_norm": 0.7516609856190137,
      "learning_rate": 0.003,
      "loss": 4.1608,
      "step": 2342
    },
    {
      "epoch": 0.02343,
      "grad_norm": 0.7840218113502367,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 2343
    },
    {
      "epoch": 0.02344,
      "grad_norm": 0.7106909207560747,
      "learning_rate": 0.003,
      "loss": 4.1579,
      "step": 2344
    },
    {
      "epoch": 0.02345,
      "grad_norm": 0.695743479378358,
      "learning_rate": 0.003,
      "loss": 4.1505,
      "step": 2345
    },
    {
      "epoch": 0.02346,
      "grad_norm": 0.7618556711999727,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 2346
    },
    {
      "epoch": 0.02347,
      "grad_norm": 0.8494779389825688,
      "learning_rate": 0.003,
      "loss": 4.1923,
      "step": 2347
    },
    {
      "epoch": 0.02348,
      "grad_norm": 0.8819304188600076,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 2348
    },
    {
      "epoch": 0.02349,
      "grad_norm": 0.801874189029763,
      "learning_rate": 0.003,
      "loss": 4.1729,
      "step": 2349
    },
    {
      "epoch": 0.0235,
      "grad_norm": 0.7502001194200574,
      "learning_rate": 0.003,
      "loss": 4.168,
      "step": 2350
    },
    {
      "epoch": 0.02351,
      "grad_norm": 0.7987799293083814,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 2351
    },
    {
      "epoch": 0.02352,
      "grad_norm": 0.9306705411998242,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 2352
    },
    {
      "epoch": 0.02353,
      "grad_norm": 0.8564501775467426,
      "learning_rate": 0.003,
      "loss": 4.1759,
      "step": 2353
    },
    {
      "epoch": 0.02354,
      "grad_norm": 0.8345109850380096,
      "learning_rate": 0.003,
      "loss": 4.1813,
      "step": 2354
    },
    {
      "epoch": 0.02355,
      "grad_norm": 0.7887172745864481,
      "learning_rate": 0.003,
      "loss": 4.1723,
      "step": 2355
    },
    {
      "epoch": 0.02356,
      "grad_norm": 0.6582873563724112,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 2356
    },
    {
      "epoch": 0.02357,
      "grad_norm": 0.6737457349000578,
      "learning_rate": 0.003,
      "loss": 4.1517,
      "step": 2357
    },
    {
      "epoch": 0.02358,
      "grad_norm": 0.7705872627309706,
      "learning_rate": 0.003,
      "loss": 4.1742,
      "step": 2358
    },
    {
      "epoch": 0.02359,
      "grad_norm": 0.8542556066081018,
      "learning_rate": 0.003,
      "loss": 4.1536,
      "step": 2359
    },
    {
      "epoch": 0.0236,
      "grad_norm": 0.9842191585501165,
      "learning_rate": 0.003,
      "loss": 4.2032,
      "step": 2360
    },
    {
      "epoch": 0.02361,
      "grad_norm": 0.9112511096880844,
      "learning_rate": 0.003,
      "loss": 4.1793,
      "step": 2361
    },
    {
      "epoch": 0.02362,
      "grad_norm": 0.9278874197656558,
      "learning_rate": 0.003,
      "loss": 4.1712,
      "step": 2362
    },
    {
      "epoch": 0.02363,
      "grad_norm": 0.7482616982968588,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 2363
    },
    {
      "epoch": 0.02364,
      "grad_norm": 0.5981870727991454,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 2364
    },
    {
      "epoch": 0.02365,
      "grad_norm": 0.6120042832874062,
      "learning_rate": 0.003,
      "loss": 4.1788,
      "step": 2365
    },
    {
      "epoch": 0.02366,
      "grad_norm": 0.592965390221233,
      "learning_rate": 0.003,
      "loss": 4.1489,
      "step": 2366
    },
    {
      "epoch": 0.02367,
      "grad_norm": 0.5967931209483315,
      "learning_rate": 0.003,
      "loss": 4.1579,
      "step": 2367
    },
    {
      "epoch": 0.02368,
      "grad_norm": 0.6289890460021353,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 2368
    },
    {
      "epoch": 0.02369,
      "grad_norm": 0.6580991252391354,
      "learning_rate": 0.003,
      "loss": 4.1441,
      "step": 2369
    },
    {
      "epoch": 0.0237,
      "grad_norm": 0.6945790129958274,
      "learning_rate": 0.003,
      "loss": 4.1741,
      "step": 2370
    },
    {
      "epoch": 0.02371,
      "grad_norm": 0.7107030962570803,
      "learning_rate": 0.003,
      "loss": 4.1569,
      "step": 2371
    },
    {
      "epoch": 0.02372,
      "grad_norm": 0.8059296050081108,
      "learning_rate": 0.003,
      "loss": 4.149,
      "step": 2372
    },
    {
      "epoch": 0.02373,
      "grad_norm": 0.921257026362051,
      "learning_rate": 0.003,
      "loss": 4.1414,
      "step": 2373
    },
    {
      "epoch": 0.02374,
      "grad_norm": 0.955728704961619,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 2374
    },
    {
      "epoch": 0.02375,
      "grad_norm": 0.7504286137015724,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 2375
    },
    {
      "epoch": 0.02376,
      "grad_norm": 0.7478781963883802,
      "learning_rate": 0.003,
      "loss": 4.1724,
      "step": 2376
    },
    {
      "epoch": 0.02377,
      "grad_norm": 0.917428121872858,
      "learning_rate": 0.003,
      "loss": 4.1486,
      "step": 2377
    },
    {
      "epoch": 0.02378,
      "grad_norm": 0.7822980127349399,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 2378
    },
    {
      "epoch": 0.02379,
      "grad_norm": 0.5863463150077827,
      "learning_rate": 0.003,
      "loss": 4.136,
      "step": 2379
    },
    {
      "epoch": 0.0238,
      "grad_norm": 0.5340016137538751,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 2380
    },
    {
      "epoch": 0.02381,
      "grad_norm": 0.6128857312553131,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 2381
    },
    {
      "epoch": 0.02382,
      "grad_norm": 0.5658688824938058,
      "learning_rate": 0.003,
      "loss": 4.1779,
      "step": 2382
    },
    {
      "epoch": 0.02383,
      "grad_norm": 0.5676379045904221,
      "learning_rate": 0.003,
      "loss": 4.1407,
      "step": 2383
    },
    {
      "epoch": 0.02384,
      "grad_norm": 0.5238096521489247,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 2384
    },
    {
      "epoch": 0.02385,
      "grad_norm": 0.5152751390365298,
      "learning_rate": 0.003,
      "loss": 4.1501,
      "step": 2385
    },
    {
      "epoch": 0.02386,
      "grad_norm": 0.6091514932256747,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 2386
    },
    {
      "epoch": 0.02387,
      "grad_norm": 0.7793599922586826,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 2387
    },
    {
      "epoch": 0.02388,
      "grad_norm": 0.9352617435026656,
      "learning_rate": 0.003,
      "loss": 4.1923,
      "step": 2388
    },
    {
      "epoch": 0.02389,
      "grad_norm": 0.9609729763435508,
      "learning_rate": 0.003,
      "loss": 4.1586,
      "step": 2389
    },
    {
      "epoch": 0.0239,
      "grad_norm": 0.8094018370976185,
      "learning_rate": 0.003,
      "loss": 4.1475,
      "step": 2390
    },
    {
      "epoch": 0.02391,
      "grad_norm": 0.8506283143692317,
      "learning_rate": 0.003,
      "loss": 4.1734,
      "step": 2391
    },
    {
      "epoch": 0.02392,
      "grad_norm": 0.9050332115497783,
      "learning_rate": 0.003,
      "loss": 4.1803,
      "step": 2392
    },
    {
      "epoch": 0.02393,
      "grad_norm": 0.9634890728744777,
      "learning_rate": 0.003,
      "loss": 4.1413,
      "step": 2393
    },
    {
      "epoch": 0.02394,
      "grad_norm": 0.8882373157340075,
      "learning_rate": 0.003,
      "loss": 4.1569,
      "step": 2394
    },
    {
      "epoch": 0.02395,
      "grad_norm": 0.7565332153535935,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 2395
    },
    {
      "epoch": 0.02396,
      "grad_norm": 0.8603458435447788,
      "learning_rate": 0.003,
      "loss": 4.1849,
      "step": 2396
    },
    {
      "epoch": 0.02397,
      "grad_norm": 0.8261263361309694,
      "learning_rate": 0.003,
      "loss": 4.169,
      "step": 2397
    },
    {
      "epoch": 0.02398,
      "grad_norm": 0.7794743587492478,
      "learning_rate": 0.003,
      "loss": 4.1744,
      "step": 2398
    },
    {
      "epoch": 0.02399,
      "grad_norm": 0.8140982417839558,
      "learning_rate": 0.003,
      "loss": 4.1533,
      "step": 2399
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.9091203275980858,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 2400
    },
    {
      "epoch": 0.02401,
      "grad_norm": 0.9082248326953591,
      "learning_rate": 0.003,
      "loss": 4.1471,
      "step": 2401
    },
    {
      "epoch": 0.02402,
      "grad_norm": 0.8996640369589112,
      "learning_rate": 0.003,
      "loss": 4.1798,
      "step": 2402
    },
    {
      "epoch": 0.02403,
      "grad_norm": 0.9493494458526733,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 2403
    },
    {
      "epoch": 0.02404,
      "grad_norm": 0.9827368493949332,
      "learning_rate": 0.003,
      "loss": 4.1965,
      "step": 2404
    },
    {
      "epoch": 0.02405,
      "grad_norm": 0.9551905621174913,
      "learning_rate": 0.003,
      "loss": 4.2021,
      "step": 2405
    },
    {
      "epoch": 0.02406,
      "grad_norm": 0.9364752365255031,
      "learning_rate": 0.003,
      "loss": 4.1854,
      "step": 2406
    },
    {
      "epoch": 0.02407,
      "grad_norm": 0.9463612550147267,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 2407
    },
    {
      "epoch": 0.02408,
      "grad_norm": 0.8719994888631714,
      "learning_rate": 0.003,
      "loss": 4.1732,
      "step": 2408
    },
    {
      "epoch": 0.02409,
      "grad_norm": 0.864310092393717,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 2409
    },
    {
      "epoch": 0.0241,
      "grad_norm": 0.8973500607718927,
      "learning_rate": 0.003,
      "loss": 4.1883,
      "step": 2410
    },
    {
      "epoch": 0.02411,
      "grad_norm": 1.1693499034787715,
      "learning_rate": 0.003,
      "loss": 4.1693,
      "step": 2411
    },
    {
      "epoch": 0.02412,
      "grad_norm": 0.8431604098452435,
      "learning_rate": 0.003,
      "loss": 4.164,
      "step": 2412
    },
    {
      "epoch": 0.02413,
      "grad_norm": 0.7767055362680262,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 2413
    },
    {
      "epoch": 0.02414,
      "grad_norm": 0.8330306566206577,
      "learning_rate": 0.003,
      "loss": 4.1744,
      "step": 2414
    },
    {
      "epoch": 0.02415,
      "grad_norm": 0.8217708926095696,
      "learning_rate": 0.003,
      "loss": 4.1765,
      "step": 2415
    },
    {
      "epoch": 0.02416,
      "grad_norm": 0.8597994699370953,
      "learning_rate": 0.003,
      "loss": 4.1589,
      "step": 2416
    },
    {
      "epoch": 0.02417,
      "grad_norm": 0.8398506547738631,
      "learning_rate": 0.003,
      "loss": 4.1829,
      "step": 2417
    },
    {
      "epoch": 0.02418,
      "grad_norm": 0.7704254084685477,
      "learning_rate": 0.003,
      "loss": 4.1714,
      "step": 2418
    },
    {
      "epoch": 0.02419,
      "grad_norm": 0.752617109928811,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 2419
    },
    {
      "epoch": 0.0242,
      "grad_norm": 0.681614467806083,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 2420
    },
    {
      "epoch": 0.02421,
      "grad_norm": 0.57806703718961,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 2421
    },
    {
      "epoch": 0.02422,
      "grad_norm": 0.5368829917602504,
      "learning_rate": 0.003,
      "loss": 4.1904,
      "step": 2422
    },
    {
      "epoch": 0.02423,
      "grad_norm": 0.49926017966447833,
      "learning_rate": 0.003,
      "loss": 4.1767,
      "step": 2423
    },
    {
      "epoch": 0.02424,
      "grad_norm": 0.44538511553678933,
      "learning_rate": 0.003,
      "loss": 4.1709,
      "step": 2424
    },
    {
      "epoch": 0.02425,
      "grad_norm": 0.4455623217366007,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 2425
    },
    {
      "epoch": 0.02426,
      "grad_norm": 0.45034256565793024,
      "learning_rate": 0.003,
      "loss": 4.1608,
      "step": 2426
    },
    {
      "epoch": 0.02427,
      "grad_norm": 0.4277494354911392,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 2427
    },
    {
      "epoch": 0.02428,
      "grad_norm": 0.49536930851083055,
      "learning_rate": 0.003,
      "loss": 4.1561,
      "step": 2428
    },
    {
      "epoch": 0.02429,
      "grad_norm": 0.5349623201813287,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 2429
    },
    {
      "epoch": 0.0243,
      "grad_norm": 0.6385700511917229,
      "learning_rate": 0.003,
      "loss": 4.1681,
      "step": 2430
    },
    {
      "epoch": 0.02431,
      "grad_norm": 0.7962007873904552,
      "learning_rate": 0.003,
      "loss": 4.1506,
      "step": 2431
    },
    {
      "epoch": 0.02432,
      "grad_norm": 0.7552982964141086,
      "learning_rate": 0.003,
      "loss": 4.1735,
      "step": 2432
    },
    {
      "epoch": 0.02433,
      "grad_norm": 0.5576758209292167,
      "learning_rate": 0.003,
      "loss": 4.179,
      "step": 2433
    },
    {
      "epoch": 0.02434,
      "grad_norm": 0.5731742455027719,
      "learning_rate": 0.003,
      "loss": 4.1457,
      "step": 2434
    },
    {
      "epoch": 0.02435,
      "grad_norm": 0.6782357026569522,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 2435
    },
    {
      "epoch": 0.02436,
      "grad_norm": 0.8071855900106416,
      "learning_rate": 0.003,
      "loss": 4.1587,
      "step": 2436
    },
    {
      "epoch": 0.02437,
      "grad_norm": 0.9265059909484438,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 2437
    },
    {
      "epoch": 0.02438,
      "grad_norm": 0.8793214152171194,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 2438
    },
    {
      "epoch": 0.02439,
      "grad_norm": 0.8832749976596599,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 2439
    },
    {
      "epoch": 0.0244,
      "grad_norm": 0.9746984844718865,
      "learning_rate": 0.003,
      "loss": 4.1934,
      "step": 2440
    },
    {
      "epoch": 0.02441,
      "grad_norm": 1.0708150303834312,
      "learning_rate": 0.003,
      "loss": 4.1618,
      "step": 2441
    },
    {
      "epoch": 0.02442,
      "grad_norm": 1.040411975678819,
      "learning_rate": 0.003,
      "loss": 4.2014,
      "step": 2442
    },
    {
      "epoch": 0.02443,
      "grad_norm": 1.157601705274513,
      "learning_rate": 0.003,
      "loss": 4.1413,
      "step": 2443
    },
    {
      "epoch": 0.02444,
      "grad_norm": 0.8998517717034221,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 2444
    },
    {
      "epoch": 0.02445,
      "grad_norm": 0.7914422604882547,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 2445
    },
    {
      "epoch": 0.02446,
      "grad_norm": 0.765570533672039,
      "learning_rate": 0.003,
      "loss": 4.168,
      "step": 2446
    },
    {
      "epoch": 0.02447,
      "grad_norm": 0.7780408804262848,
      "learning_rate": 0.003,
      "loss": 4.1745,
      "step": 2447
    },
    {
      "epoch": 0.02448,
      "grad_norm": 0.8355663969723807,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 2448
    },
    {
      "epoch": 0.02449,
      "grad_norm": 0.8911004412381984,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 2449
    },
    {
      "epoch": 0.0245,
      "grad_norm": 0.8721571131136453,
      "learning_rate": 0.003,
      "loss": 4.1764,
      "step": 2450
    },
    {
      "epoch": 0.02451,
      "grad_norm": 0.8424855685228627,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 2451
    },
    {
      "epoch": 0.02452,
      "grad_norm": 0.71343486564028,
      "learning_rate": 0.003,
      "loss": 4.1589,
      "step": 2452
    },
    {
      "epoch": 0.02453,
      "grad_norm": 0.830794139281049,
      "learning_rate": 0.003,
      "loss": 4.1386,
      "step": 2453
    },
    {
      "epoch": 0.02454,
      "grad_norm": 0.8371964018463887,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 2454
    },
    {
      "epoch": 0.02455,
      "grad_norm": 0.828173261775671,
      "learning_rate": 0.003,
      "loss": 4.1555,
      "step": 2455
    },
    {
      "epoch": 0.02456,
      "grad_norm": 0.8111209228027585,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 2456
    },
    {
      "epoch": 0.02457,
      "grad_norm": 0.7593406743031937,
      "learning_rate": 0.003,
      "loss": 4.175,
      "step": 2457
    },
    {
      "epoch": 0.02458,
      "grad_norm": 0.6152494042836864,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 2458
    },
    {
      "epoch": 0.02459,
      "grad_norm": 0.6726714704776762,
      "learning_rate": 0.003,
      "loss": 4.1704,
      "step": 2459
    },
    {
      "epoch": 0.0246,
      "grad_norm": 0.6849708403280691,
      "learning_rate": 0.003,
      "loss": 4.1742,
      "step": 2460
    },
    {
      "epoch": 0.02461,
      "grad_norm": 0.6926478544751591,
      "learning_rate": 0.003,
      "loss": 4.1489,
      "step": 2461
    },
    {
      "epoch": 0.02462,
      "grad_norm": 0.6546290518877314,
      "learning_rate": 0.003,
      "loss": 4.1745,
      "step": 2462
    },
    {
      "epoch": 0.02463,
      "grad_norm": 0.6160770029093054,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 2463
    },
    {
      "epoch": 0.02464,
      "grad_norm": 0.5990408684539491,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 2464
    },
    {
      "epoch": 0.02465,
      "grad_norm": 0.5034148559800348,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 2465
    },
    {
      "epoch": 0.02466,
      "grad_norm": 0.4860347114545619,
      "learning_rate": 0.003,
      "loss": 4.1408,
      "step": 2466
    },
    {
      "epoch": 0.02467,
      "grad_norm": 0.4781136004723512,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 2467
    },
    {
      "epoch": 0.02468,
      "grad_norm": 0.4265902063645912,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 2468
    },
    {
      "epoch": 0.02469,
      "grad_norm": 0.5508562887056804,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 2469
    },
    {
      "epoch": 0.0247,
      "grad_norm": 0.8249536754461567,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 2470
    },
    {
      "epoch": 0.02471,
      "grad_norm": 1.4359640347930054,
      "learning_rate": 0.003,
      "loss": 4.1561,
      "step": 2471
    },
    {
      "epoch": 0.02472,
      "grad_norm": 0.6769644869683711,
      "learning_rate": 0.003,
      "loss": 4.1433,
      "step": 2472
    },
    {
      "epoch": 0.02473,
      "grad_norm": 0.7658209871323785,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 2473
    },
    {
      "epoch": 0.02474,
      "grad_norm": 0.9511612267134097,
      "learning_rate": 0.003,
      "loss": 4.1669,
      "step": 2474
    },
    {
      "epoch": 0.02475,
      "grad_norm": 0.8892227903889872,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 2475
    },
    {
      "epoch": 0.02476,
      "grad_norm": 0.9920143400330694,
      "learning_rate": 0.003,
      "loss": 4.152,
      "step": 2476
    },
    {
      "epoch": 0.02477,
      "grad_norm": 0.9006606413119249,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 2477
    },
    {
      "epoch": 0.02478,
      "grad_norm": 0.8088690703733978,
      "learning_rate": 0.003,
      "loss": 4.1471,
      "step": 2478
    },
    {
      "epoch": 0.02479,
      "grad_norm": 0.8182856999501472,
      "learning_rate": 0.003,
      "loss": 4.1366,
      "step": 2479
    },
    {
      "epoch": 0.0248,
      "grad_norm": 0.7355042387960451,
      "learning_rate": 0.003,
      "loss": 4.178,
      "step": 2480
    },
    {
      "epoch": 0.02481,
      "grad_norm": 0.6962248057182712,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 2481
    },
    {
      "epoch": 0.02482,
      "grad_norm": 0.6352228705173556,
      "learning_rate": 0.003,
      "loss": 4.1666,
      "step": 2482
    },
    {
      "epoch": 0.02483,
      "grad_norm": 0.6906094612236757,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 2483
    },
    {
      "epoch": 0.02484,
      "grad_norm": 0.66658401463766,
      "learning_rate": 0.003,
      "loss": 4.1474,
      "step": 2484
    },
    {
      "epoch": 0.02485,
      "grad_norm": 0.6596750859147653,
      "learning_rate": 0.003,
      "loss": 4.1705,
      "step": 2485
    },
    {
      "epoch": 0.02486,
      "grad_norm": 0.7066148867713012,
      "learning_rate": 0.003,
      "loss": 4.1929,
      "step": 2486
    },
    {
      "epoch": 0.02487,
      "grad_norm": 0.754767630208959,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 2487
    },
    {
      "epoch": 0.02488,
      "grad_norm": 0.7178651369815516,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 2488
    },
    {
      "epoch": 0.02489,
      "grad_norm": 0.7388633013067267,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 2489
    },
    {
      "epoch": 0.0249,
      "grad_norm": 0.7802405314573639,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 2490
    },
    {
      "epoch": 0.02491,
      "grad_norm": 0.6722919878141813,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 2491
    },
    {
      "epoch": 0.02492,
      "grad_norm": 0.6231446299505073,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 2492
    },
    {
      "epoch": 0.02493,
      "grad_norm": 0.6055678621905288,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 2493
    },
    {
      "epoch": 0.02494,
      "grad_norm": 0.6270135330055409,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 2494
    },
    {
      "epoch": 0.02495,
      "grad_norm": 0.6525517462302536,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 2495
    },
    {
      "epoch": 0.02496,
      "grad_norm": 0.7329319033542416,
      "learning_rate": 0.003,
      "loss": 4.14,
      "step": 2496
    },
    {
      "epoch": 0.02497,
      "grad_norm": 0.9619996867887689,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 2497
    },
    {
      "epoch": 0.02498,
      "grad_norm": 1.141814403141371,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 2498
    },
    {
      "epoch": 0.02499,
      "grad_norm": 0.9016540268347464,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 2499
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.9065801171881926,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 2500
    },
    {
      "epoch": 0.02501,
      "grad_norm": 0.8844338776736929,
      "learning_rate": 0.003,
      "loss": 4.1765,
      "step": 2501
    },
    {
      "epoch": 0.02502,
      "grad_norm": 0.8046602370535848,
      "learning_rate": 0.003,
      "loss": 4.1579,
      "step": 2502
    },
    {
      "epoch": 0.02503,
      "grad_norm": 0.7039592913476108,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 2503
    },
    {
      "epoch": 0.02504,
      "grad_norm": 0.7419132370317111,
      "learning_rate": 0.003,
      "loss": 4.1429,
      "step": 2504
    },
    {
      "epoch": 0.02505,
      "grad_norm": 0.7606498464541366,
      "learning_rate": 0.003,
      "loss": 4.1455,
      "step": 2505
    },
    {
      "epoch": 0.02506,
      "grad_norm": 0.769560412914919,
      "learning_rate": 0.003,
      "loss": 4.1579,
      "step": 2506
    },
    {
      "epoch": 0.02507,
      "grad_norm": 0.8361991263151639,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 2507
    },
    {
      "epoch": 0.02508,
      "grad_norm": 1.0314436698214402,
      "learning_rate": 0.003,
      "loss": 4.1552,
      "step": 2508
    },
    {
      "epoch": 0.02509,
      "grad_norm": 1.0808307469983176,
      "learning_rate": 0.003,
      "loss": 4.1638,
      "step": 2509
    },
    {
      "epoch": 0.0251,
      "grad_norm": 0.9756326578065097,
      "learning_rate": 0.003,
      "loss": 4.1772,
      "step": 2510
    },
    {
      "epoch": 0.02511,
      "grad_norm": 0.9782444756269447,
      "learning_rate": 0.003,
      "loss": 4.1853,
      "step": 2511
    },
    {
      "epoch": 0.02512,
      "grad_norm": 0.9679666828238646,
      "learning_rate": 0.003,
      "loss": 4.1672,
      "step": 2512
    },
    {
      "epoch": 0.02513,
      "grad_norm": 0.9141144321737233,
      "learning_rate": 0.003,
      "loss": 4.1647,
      "step": 2513
    },
    {
      "epoch": 0.02514,
      "grad_norm": 0.870311596191751,
      "learning_rate": 0.003,
      "loss": 4.1521,
      "step": 2514
    },
    {
      "epoch": 0.02515,
      "grad_norm": 0.718099567068549,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 2515
    },
    {
      "epoch": 0.02516,
      "grad_norm": 0.6485758792814954,
      "learning_rate": 0.003,
      "loss": 4.153,
      "step": 2516
    },
    {
      "epoch": 0.02517,
      "grad_norm": 0.6631125888262853,
      "learning_rate": 0.003,
      "loss": 4.1528,
      "step": 2517
    },
    {
      "epoch": 0.02518,
      "grad_norm": 0.62508603256109,
      "learning_rate": 0.003,
      "loss": 4.1629,
      "step": 2518
    },
    {
      "epoch": 0.02519,
      "grad_norm": 0.6533648513332591,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 2519
    },
    {
      "epoch": 0.0252,
      "grad_norm": 0.8730883164618287,
      "learning_rate": 0.003,
      "loss": 4.1603,
      "step": 2520
    },
    {
      "epoch": 0.02521,
      "grad_norm": 1.1508715959612579,
      "learning_rate": 0.003,
      "loss": 4.1617,
      "step": 2521
    },
    {
      "epoch": 0.02522,
      "grad_norm": 0.8851560060677419,
      "learning_rate": 0.003,
      "loss": 4.1564,
      "step": 2522
    },
    {
      "epoch": 0.02523,
      "grad_norm": 0.7609503224943213,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 2523
    },
    {
      "epoch": 0.02524,
      "grad_norm": 0.7874722612084045,
      "learning_rate": 0.003,
      "loss": 4.1632,
      "step": 2524
    },
    {
      "epoch": 0.02525,
      "grad_norm": 0.9497267581795765,
      "learning_rate": 0.003,
      "loss": 4.1834,
      "step": 2525
    },
    {
      "epoch": 0.02526,
      "grad_norm": 1.1487960136042998,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 2526
    },
    {
      "epoch": 0.02527,
      "grad_norm": 0.749631180786954,
      "learning_rate": 0.003,
      "loss": 4.1602,
      "step": 2527
    },
    {
      "epoch": 0.02528,
      "grad_norm": 0.6717432753530606,
      "learning_rate": 0.003,
      "loss": 4.1816,
      "step": 2528
    },
    {
      "epoch": 0.02529,
      "grad_norm": 0.6421573788772453,
      "learning_rate": 0.003,
      "loss": 4.1481,
      "step": 2529
    },
    {
      "epoch": 0.0253,
      "grad_norm": 0.7486612963376212,
      "learning_rate": 0.003,
      "loss": 4.1689,
      "step": 2530
    },
    {
      "epoch": 0.02531,
      "grad_norm": 0.7949983435688287,
      "learning_rate": 0.003,
      "loss": 4.171,
      "step": 2531
    },
    {
      "epoch": 0.02532,
      "grad_norm": 0.7618621411023159,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 2532
    },
    {
      "epoch": 0.02533,
      "grad_norm": 0.8123389715874717,
      "learning_rate": 0.003,
      "loss": 4.1773,
      "step": 2533
    },
    {
      "epoch": 0.02534,
      "grad_norm": 0.808321322590649,
      "learning_rate": 0.003,
      "loss": 4.1573,
      "step": 2534
    },
    {
      "epoch": 0.02535,
      "grad_norm": 0.7051467031412186,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 2535
    },
    {
      "epoch": 0.02536,
      "grad_norm": 0.6093069610094953,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 2536
    },
    {
      "epoch": 0.02537,
      "grad_norm": 0.5843395517845005,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 2537
    },
    {
      "epoch": 0.02538,
      "grad_norm": 0.5674485238862751,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 2538
    },
    {
      "epoch": 0.02539,
      "grad_norm": 0.5652766007864589,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 2539
    },
    {
      "epoch": 0.0254,
      "grad_norm": 0.6570254628858063,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 2540
    },
    {
      "epoch": 0.02541,
      "grad_norm": 0.8206242142708078,
      "learning_rate": 0.003,
      "loss": 4.1443,
      "step": 2541
    },
    {
      "epoch": 0.02542,
      "grad_norm": 0.9677168967962686,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 2542
    },
    {
      "epoch": 0.02543,
      "grad_norm": 1.024805521305044,
      "learning_rate": 0.003,
      "loss": 4.1646,
      "step": 2543
    },
    {
      "epoch": 0.02544,
      "grad_norm": 0.7591855178067561,
      "learning_rate": 0.003,
      "loss": 4.148,
      "step": 2544
    },
    {
      "epoch": 0.02545,
      "grad_norm": 0.6449474624868338,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 2545
    },
    {
      "epoch": 0.02546,
      "grad_norm": 0.6980586139707194,
      "learning_rate": 0.003,
      "loss": 4.136,
      "step": 2546
    },
    {
      "epoch": 0.02547,
      "grad_norm": 0.7283106303300475,
      "learning_rate": 0.003,
      "loss": 4.1754,
      "step": 2547
    },
    {
      "epoch": 0.02548,
      "grad_norm": 0.6850467395404413,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 2548
    },
    {
      "epoch": 0.02549,
      "grad_norm": 0.7158743790784491,
      "learning_rate": 0.003,
      "loss": 4.1632,
      "step": 2549
    },
    {
      "epoch": 0.0255,
      "grad_norm": 0.799200990441851,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 2550
    },
    {
      "epoch": 0.02551,
      "grad_norm": 0.8852033835267075,
      "learning_rate": 0.003,
      "loss": 4.165,
      "step": 2551
    },
    {
      "epoch": 0.02552,
      "grad_norm": 0.9381299460340671,
      "learning_rate": 0.003,
      "loss": 4.1394,
      "step": 2552
    },
    {
      "epoch": 0.02553,
      "grad_norm": 0.8787078285398282,
      "learning_rate": 0.003,
      "loss": 4.2019,
      "step": 2553
    },
    {
      "epoch": 0.02554,
      "grad_norm": 1.0064349811006572,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 2554
    },
    {
      "epoch": 0.02555,
      "grad_norm": 0.8623285229523936,
      "learning_rate": 0.003,
      "loss": 4.1454,
      "step": 2555
    },
    {
      "epoch": 0.02556,
      "grad_norm": 0.779791532210884,
      "learning_rate": 0.003,
      "loss": 4.154,
      "step": 2556
    },
    {
      "epoch": 0.02557,
      "grad_norm": 0.8041071030856266,
      "learning_rate": 0.003,
      "loss": 4.1561,
      "step": 2557
    },
    {
      "epoch": 0.02558,
      "grad_norm": 0.9046731898466632,
      "learning_rate": 0.003,
      "loss": 4.1797,
      "step": 2558
    },
    {
      "epoch": 0.02559,
      "grad_norm": 0.9231060963177342,
      "learning_rate": 0.003,
      "loss": 4.1753,
      "step": 2559
    },
    {
      "epoch": 0.0256,
      "grad_norm": 0.7942918059321874,
      "learning_rate": 0.003,
      "loss": 4.1509,
      "step": 2560
    },
    {
      "epoch": 0.02561,
      "grad_norm": 0.7182994269745618,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 2561
    },
    {
      "epoch": 0.02562,
      "grad_norm": 0.7377092426971169,
      "learning_rate": 0.003,
      "loss": 4.1635,
      "step": 2562
    },
    {
      "epoch": 0.02563,
      "grad_norm": 0.8008807069379792,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 2563
    },
    {
      "epoch": 0.02564,
      "grad_norm": 0.9654334894945076,
      "learning_rate": 0.003,
      "loss": 4.1721,
      "step": 2564
    },
    {
      "epoch": 0.02565,
      "grad_norm": 1.1270177489997797,
      "learning_rate": 0.003,
      "loss": 4.1607,
      "step": 2565
    },
    {
      "epoch": 0.02566,
      "grad_norm": 0.7790702826784099,
      "learning_rate": 0.003,
      "loss": 4.1704,
      "step": 2566
    },
    {
      "epoch": 0.02567,
      "grad_norm": 0.7253981064969616,
      "learning_rate": 0.003,
      "loss": 4.1684,
      "step": 2567
    },
    {
      "epoch": 0.02568,
      "grad_norm": 0.7766989633341348,
      "learning_rate": 0.003,
      "loss": 4.1634,
      "step": 2568
    },
    {
      "epoch": 0.02569,
      "grad_norm": 0.6753943323614341,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 2569
    },
    {
      "epoch": 0.0257,
      "grad_norm": 0.6053036326649146,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 2570
    },
    {
      "epoch": 0.02571,
      "grad_norm": 0.5412545714840278,
      "learning_rate": 0.003,
      "loss": 4.1611,
      "step": 2571
    },
    {
      "epoch": 0.02572,
      "grad_norm": 0.618814983550634,
      "learning_rate": 0.003,
      "loss": 4.1399,
      "step": 2572
    },
    {
      "epoch": 0.02573,
      "grad_norm": 0.6647990779688017,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 2573
    },
    {
      "epoch": 0.02574,
      "grad_norm": 0.7052848934500006,
      "learning_rate": 0.003,
      "loss": 4.1737,
      "step": 2574
    },
    {
      "epoch": 0.02575,
      "grad_norm": 0.7610884337795838,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 2575
    },
    {
      "epoch": 0.02576,
      "grad_norm": 0.6692271813839591,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 2576
    },
    {
      "epoch": 0.02577,
      "grad_norm": 0.5756800187751938,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 2577
    },
    {
      "epoch": 0.02578,
      "grad_norm": 0.5445744087322336,
      "learning_rate": 0.003,
      "loss": 4.1401,
      "step": 2578
    },
    {
      "epoch": 0.02579,
      "grad_norm": 0.5526066768211919,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 2579
    },
    {
      "epoch": 0.0258,
      "grad_norm": 0.555186498872379,
      "learning_rate": 0.003,
      "loss": 4.1418,
      "step": 2580
    },
    {
      "epoch": 0.02581,
      "grad_norm": 0.590974511132741,
      "learning_rate": 0.003,
      "loss": 4.1622,
      "step": 2581
    },
    {
      "epoch": 0.02582,
      "grad_norm": 0.6134714053872987,
      "learning_rate": 0.003,
      "loss": 4.157,
      "step": 2582
    },
    {
      "epoch": 0.02583,
      "grad_norm": 0.6785456864342533,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 2583
    },
    {
      "epoch": 0.02584,
      "grad_norm": 0.7692608608699008,
      "learning_rate": 0.003,
      "loss": 4.1476,
      "step": 2584
    },
    {
      "epoch": 0.02585,
      "grad_norm": 0.8096555829199314,
      "learning_rate": 0.003,
      "loss": 4.1338,
      "step": 2585
    },
    {
      "epoch": 0.02586,
      "grad_norm": 0.8850658157806096,
      "learning_rate": 0.003,
      "loss": 4.1563,
      "step": 2586
    },
    {
      "epoch": 0.02587,
      "grad_norm": 1.0102418517134517,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 2587
    },
    {
      "epoch": 0.02588,
      "grad_norm": 0.9275963803575394,
      "learning_rate": 0.003,
      "loss": 4.1505,
      "step": 2588
    },
    {
      "epoch": 0.02589,
      "grad_norm": 0.9403619185225363,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 2589
    },
    {
      "epoch": 0.0259,
      "grad_norm": 1.0078495428012795,
      "learning_rate": 0.003,
      "loss": 4.1693,
      "step": 2590
    },
    {
      "epoch": 0.02591,
      "grad_norm": 1.2743604633788357,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 2591
    },
    {
      "epoch": 0.02592,
      "grad_norm": 0.8231355726319857,
      "learning_rate": 0.003,
      "loss": 4.155,
      "step": 2592
    },
    {
      "epoch": 0.02593,
      "grad_norm": 0.6890041984971179,
      "learning_rate": 0.003,
      "loss": 4.1511,
      "step": 2593
    },
    {
      "epoch": 0.02594,
      "grad_norm": 0.7454180549344623,
      "learning_rate": 0.003,
      "loss": 4.1728,
      "step": 2594
    },
    {
      "epoch": 0.02595,
      "grad_norm": 0.6902489836421277,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 2595
    },
    {
      "epoch": 0.02596,
      "grad_norm": 0.8183952841129141,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 2596
    },
    {
      "epoch": 0.02597,
      "grad_norm": 0.9211443305771235,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 2597
    },
    {
      "epoch": 0.02598,
      "grad_norm": 1.0292390798108857,
      "learning_rate": 0.003,
      "loss": 4.1827,
      "step": 2598
    },
    {
      "epoch": 0.02599,
      "grad_norm": 0.9373288426158787,
      "learning_rate": 0.003,
      "loss": 4.1706,
      "step": 2599
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.920433494603301,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 2600
    },
    {
      "epoch": 0.02601,
      "grad_norm": 0.8451864765965912,
      "learning_rate": 0.003,
      "loss": 4.1776,
      "step": 2601
    },
    {
      "epoch": 0.02602,
      "grad_norm": 0.7170319795923626,
      "learning_rate": 0.003,
      "loss": 4.1455,
      "step": 2602
    },
    {
      "epoch": 0.02603,
      "grad_norm": 0.6966464803396121,
      "learning_rate": 0.003,
      "loss": 4.1401,
      "step": 2603
    },
    {
      "epoch": 0.02604,
      "grad_norm": 0.7612566166038212,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 2604
    },
    {
      "epoch": 0.02605,
      "grad_norm": 0.6464701620756803,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 2605
    },
    {
      "epoch": 0.02606,
      "grad_norm": 0.5737007841123123,
      "learning_rate": 0.003,
      "loss": 4.162,
      "step": 2606
    },
    {
      "epoch": 0.02607,
      "grad_norm": 0.6131766565865462,
      "learning_rate": 0.003,
      "loss": 4.1509,
      "step": 2607
    },
    {
      "epoch": 0.02608,
      "grad_norm": 0.8086035003985383,
      "learning_rate": 0.003,
      "loss": 4.136,
      "step": 2608
    },
    {
      "epoch": 0.02609,
      "grad_norm": 0.9817909383598199,
      "learning_rate": 0.003,
      "loss": 4.1914,
      "step": 2609
    },
    {
      "epoch": 0.0261,
      "grad_norm": 1.0720508985841146,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 2610
    },
    {
      "epoch": 0.02611,
      "grad_norm": 0.8659326470213682,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 2611
    },
    {
      "epoch": 0.02612,
      "grad_norm": 0.8732525902579722,
      "learning_rate": 0.003,
      "loss": 4.1648,
      "step": 2612
    },
    {
      "epoch": 0.02613,
      "grad_norm": 0.9874637099516153,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 2613
    },
    {
      "epoch": 0.02614,
      "grad_norm": 0.906401203843813,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 2614
    },
    {
      "epoch": 0.02615,
      "grad_norm": 0.6462260762478055,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 2615
    },
    {
      "epoch": 0.02616,
      "grad_norm": 0.6006231478500065,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 2616
    },
    {
      "epoch": 0.02617,
      "grad_norm": 0.6102511179138815,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 2617
    },
    {
      "epoch": 0.02618,
      "grad_norm": 0.6619768894826954,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 2618
    },
    {
      "epoch": 0.02619,
      "grad_norm": 0.7040985692908992,
      "learning_rate": 0.003,
      "loss": 4.1627,
      "step": 2619
    },
    {
      "epoch": 0.0262,
      "grad_norm": 0.7390370888569476,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 2620
    },
    {
      "epoch": 0.02621,
      "grad_norm": 0.8595720375733896,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 2621
    },
    {
      "epoch": 0.02622,
      "grad_norm": 0.8747740097574618,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 2622
    },
    {
      "epoch": 0.02623,
      "grad_norm": 0.7865985620123697,
      "learning_rate": 0.003,
      "loss": 4.1577,
      "step": 2623
    },
    {
      "epoch": 0.02624,
      "grad_norm": 0.721745866206175,
      "learning_rate": 0.003,
      "loss": 4.1924,
      "step": 2624
    },
    {
      "epoch": 0.02625,
      "grad_norm": 0.6369082515266815,
      "learning_rate": 0.003,
      "loss": 4.1844,
      "step": 2625
    },
    {
      "epoch": 0.02626,
      "grad_norm": 0.6738431853903379,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 2626
    },
    {
      "epoch": 0.02627,
      "grad_norm": 0.8070717121127621,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 2627
    },
    {
      "epoch": 0.02628,
      "grad_norm": 0.9052615625467546,
      "learning_rate": 0.003,
      "loss": 4.1733,
      "step": 2628
    },
    {
      "epoch": 0.02629,
      "grad_norm": 0.9064578842099213,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 2629
    },
    {
      "epoch": 0.0263,
      "grad_norm": 0.9572710825858507,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 2630
    },
    {
      "epoch": 0.02631,
      "grad_norm": 0.9098415499268271,
      "learning_rate": 0.003,
      "loss": 4.1702,
      "step": 2631
    },
    {
      "epoch": 0.02632,
      "grad_norm": 0.957961730332363,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 2632
    },
    {
      "epoch": 0.02633,
      "grad_norm": 0.8910675740871853,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 2633
    },
    {
      "epoch": 0.02634,
      "grad_norm": 0.8894027249202754,
      "learning_rate": 0.003,
      "loss": 4.1657,
      "step": 2634
    },
    {
      "epoch": 0.02635,
      "grad_norm": 1.0919965764145694,
      "learning_rate": 0.003,
      "loss": 4.171,
      "step": 2635
    },
    {
      "epoch": 0.02636,
      "grad_norm": 1.101775206670289,
      "learning_rate": 0.003,
      "loss": 4.1603,
      "step": 2636
    },
    {
      "epoch": 0.02637,
      "grad_norm": 0.9585912224073222,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 2637
    },
    {
      "epoch": 0.02638,
      "grad_norm": 0.7865200036359159,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 2638
    },
    {
      "epoch": 0.02639,
      "grad_norm": 0.7526947533075404,
      "learning_rate": 0.003,
      "loss": 4.1366,
      "step": 2639
    },
    {
      "epoch": 0.0264,
      "grad_norm": 0.8605449721173474,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 2640
    },
    {
      "epoch": 0.02641,
      "grad_norm": 0.9267463548326204,
      "learning_rate": 0.003,
      "loss": 4.1502,
      "step": 2641
    },
    {
      "epoch": 0.02642,
      "grad_norm": 0.9486962646703484,
      "learning_rate": 0.003,
      "loss": 4.1611,
      "step": 2642
    },
    {
      "epoch": 0.02643,
      "grad_norm": 0.8844618097245053,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 2643
    },
    {
      "epoch": 0.02644,
      "grad_norm": 0.7967251454981952,
      "learning_rate": 0.003,
      "loss": 4.1599,
      "step": 2644
    },
    {
      "epoch": 0.02645,
      "grad_norm": 0.8225181053423539,
      "learning_rate": 0.003,
      "loss": 4.1718,
      "step": 2645
    },
    {
      "epoch": 0.02646,
      "grad_norm": 0.7987479226871795,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 2646
    },
    {
      "epoch": 0.02647,
      "grad_norm": 0.7821642664442445,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 2647
    },
    {
      "epoch": 0.02648,
      "grad_norm": 0.9284024850106564,
      "learning_rate": 0.003,
      "loss": 4.1813,
      "step": 2648
    },
    {
      "epoch": 0.02649,
      "grad_norm": 1.0907008227907322,
      "learning_rate": 0.003,
      "loss": 4.1777,
      "step": 2649
    },
    {
      "epoch": 0.0265,
      "grad_norm": 1.125793480123405,
      "learning_rate": 0.003,
      "loss": 4.1386,
      "step": 2650
    },
    {
      "epoch": 0.02651,
      "grad_norm": 0.7578496969583901,
      "learning_rate": 0.003,
      "loss": 4.1454,
      "step": 2651
    },
    {
      "epoch": 0.02652,
      "grad_norm": 0.7052794942111215,
      "learning_rate": 0.003,
      "loss": 4.1738,
      "step": 2652
    },
    {
      "epoch": 0.02653,
      "grad_norm": 0.7103196739746442,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 2653
    },
    {
      "epoch": 0.02654,
      "grad_norm": 0.6925385503676881,
      "learning_rate": 0.003,
      "loss": 4.1383,
      "step": 2654
    },
    {
      "epoch": 0.02655,
      "grad_norm": 0.781411208540797,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 2655
    },
    {
      "epoch": 0.02656,
      "grad_norm": 0.7579190054446218,
      "learning_rate": 0.003,
      "loss": 4.1226,
      "step": 2656
    },
    {
      "epoch": 0.02657,
      "grad_norm": 0.7277248228326177,
      "learning_rate": 0.003,
      "loss": 4.1484,
      "step": 2657
    },
    {
      "epoch": 0.02658,
      "grad_norm": 0.6753248968688912,
      "learning_rate": 0.003,
      "loss": 4.1533,
      "step": 2658
    },
    {
      "epoch": 0.02659,
      "grad_norm": 0.6796448265031177,
      "learning_rate": 0.003,
      "loss": 4.1465,
      "step": 2659
    },
    {
      "epoch": 0.0266,
      "grad_norm": 0.6424181529879253,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 2660
    },
    {
      "epoch": 0.02661,
      "grad_norm": 0.7766396799485783,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 2661
    },
    {
      "epoch": 0.02662,
      "grad_norm": 0.864583375609625,
      "learning_rate": 0.003,
      "loss": 4.1433,
      "step": 2662
    },
    {
      "epoch": 0.02663,
      "grad_norm": 0.8284021457779097,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 2663
    },
    {
      "epoch": 0.02664,
      "grad_norm": 0.7120779790048899,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 2664
    },
    {
      "epoch": 0.02665,
      "grad_norm": 0.6851733303412342,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 2665
    },
    {
      "epoch": 0.02666,
      "grad_norm": 0.6758809967977054,
      "learning_rate": 0.003,
      "loss": 4.1627,
      "step": 2666
    },
    {
      "epoch": 0.02667,
      "grad_norm": 0.6361286175404186,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 2667
    },
    {
      "epoch": 0.02668,
      "grad_norm": 0.5673144703469689,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 2668
    },
    {
      "epoch": 0.02669,
      "grad_norm": 0.5094068244293505,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 2669
    },
    {
      "epoch": 0.0267,
      "grad_norm": 0.5324820431990631,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 2670
    },
    {
      "epoch": 0.02671,
      "grad_norm": 0.6250352979316671,
      "learning_rate": 0.003,
      "loss": 4.1447,
      "step": 2671
    },
    {
      "epoch": 0.02672,
      "grad_norm": 0.6359363422291939,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 2672
    },
    {
      "epoch": 0.02673,
      "grad_norm": 0.6102304515358753,
      "learning_rate": 0.003,
      "loss": 4.1336,
      "step": 2673
    },
    {
      "epoch": 0.02674,
      "grad_norm": 0.7766569602853689,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 2674
    },
    {
      "epoch": 0.02675,
      "grad_norm": 0.9731715722532488,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 2675
    },
    {
      "epoch": 0.02676,
      "grad_norm": 1.0370179032200628,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 2676
    },
    {
      "epoch": 0.02677,
      "grad_norm": 0.7623073096555095,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 2677
    },
    {
      "epoch": 0.02678,
      "grad_norm": 0.7777692153557114,
      "learning_rate": 0.003,
      "loss": 4.1586,
      "step": 2678
    },
    {
      "epoch": 0.02679,
      "grad_norm": 0.8733324106472917,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 2679
    },
    {
      "epoch": 0.0268,
      "grad_norm": 0.8266517145073005,
      "learning_rate": 0.003,
      "loss": 4.1463,
      "step": 2680
    },
    {
      "epoch": 0.02681,
      "grad_norm": 0.758773812684903,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 2681
    },
    {
      "epoch": 0.02682,
      "grad_norm": 0.8283252875504241,
      "learning_rate": 0.003,
      "loss": 4.1476,
      "step": 2682
    },
    {
      "epoch": 0.02683,
      "grad_norm": 0.8404974594062254,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 2683
    },
    {
      "epoch": 0.02684,
      "grad_norm": 0.818507190423211,
      "learning_rate": 0.003,
      "loss": 4.1427,
      "step": 2684
    },
    {
      "epoch": 0.02685,
      "grad_norm": 0.7649702763962748,
      "learning_rate": 0.003,
      "loss": 4.1484,
      "step": 2685
    },
    {
      "epoch": 0.02686,
      "grad_norm": 0.7611427665837982,
      "learning_rate": 0.003,
      "loss": 4.1328,
      "step": 2686
    },
    {
      "epoch": 0.02687,
      "grad_norm": 0.9628527455690736,
      "learning_rate": 0.003,
      "loss": 4.1512,
      "step": 2687
    },
    {
      "epoch": 0.02688,
      "grad_norm": 0.8564202799506135,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 2688
    },
    {
      "epoch": 0.02689,
      "grad_norm": 0.8557838178212854,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 2689
    },
    {
      "epoch": 0.0269,
      "grad_norm": 0.9118623509339678,
      "learning_rate": 0.003,
      "loss": 4.164,
      "step": 2690
    },
    {
      "epoch": 0.02691,
      "grad_norm": 0.8301540595445771,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 2691
    },
    {
      "epoch": 0.02692,
      "grad_norm": 0.6228920440524642,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 2692
    },
    {
      "epoch": 0.02693,
      "grad_norm": 0.6859665731725182,
      "learning_rate": 0.003,
      "loss": 4.1655,
      "step": 2693
    },
    {
      "epoch": 0.02694,
      "grad_norm": 0.7138959075984226,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 2694
    },
    {
      "epoch": 0.02695,
      "grad_norm": 0.7527603952329793,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 2695
    },
    {
      "epoch": 0.02696,
      "grad_norm": 0.7052829196693109,
      "learning_rate": 0.003,
      "loss": 4.144,
      "step": 2696
    },
    {
      "epoch": 0.02697,
      "grad_norm": 0.6398105779035383,
      "learning_rate": 0.003,
      "loss": 4.1511,
      "step": 2697
    },
    {
      "epoch": 0.02698,
      "grad_norm": 0.7194666389048138,
      "learning_rate": 0.003,
      "loss": 4.1368,
      "step": 2698
    },
    {
      "epoch": 0.02699,
      "grad_norm": 0.7132487376498455,
      "learning_rate": 0.003,
      "loss": 4.1522,
      "step": 2699
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.656063779780492,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 2700
    },
    {
      "epoch": 0.02701,
      "grad_norm": 0.629455348788436,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 2701
    },
    {
      "epoch": 0.02702,
      "grad_norm": 0.7466120152268881,
      "learning_rate": 0.003,
      "loss": 4.1658,
      "step": 2702
    },
    {
      "epoch": 0.02703,
      "grad_norm": 0.777360526823959,
      "learning_rate": 0.003,
      "loss": 4.1623,
      "step": 2703
    },
    {
      "epoch": 0.02704,
      "grad_norm": 0.8782075840804512,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 2704
    },
    {
      "epoch": 0.02705,
      "grad_norm": 0.8925534153148256,
      "learning_rate": 0.003,
      "loss": 4.1656,
      "step": 2705
    },
    {
      "epoch": 0.02706,
      "grad_norm": 0.9726075369304501,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 2706
    },
    {
      "epoch": 0.02707,
      "grad_norm": 0.9439770827114989,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 2707
    },
    {
      "epoch": 0.02708,
      "grad_norm": 0.8038542754161909,
      "learning_rate": 0.003,
      "loss": 4.1613,
      "step": 2708
    },
    {
      "epoch": 0.02709,
      "grad_norm": 0.8203620131302694,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 2709
    },
    {
      "epoch": 0.0271,
      "grad_norm": 0.8724888263473524,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 2710
    },
    {
      "epoch": 0.02711,
      "grad_norm": 0.8653954860429884,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 2711
    },
    {
      "epoch": 0.02712,
      "grad_norm": 0.8020656899710369,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 2712
    },
    {
      "epoch": 0.02713,
      "grad_norm": 0.8083253547021919,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 2713
    },
    {
      "epoch": 0.02714,
      "grad_norm": 0.8690387355838295,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 2714
    },
    {
      "epoch": 0.02715,
      "grad_norm": 1.0593958700668973,
      "learning_rate": 0.003,
      "loss": 4.1347,
      "step": 2715
    },
    {
      "epoch": 0.02716,
      "grad_norm": 1.0574588180443436,
      "learning_rate": 0.003,
      "loss": 4.1532,
      "step": 2716
    },
    {
      "epoch": 0.02717,
      "grad_norm": 0.9164750014025292,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 2717
    },
    {
      "epoch": 0.02718,
      "grad_norm": 0.9137711318115498,
      "learning_rate": 0.003,
      "loss": 4.1696,
      "step": 2718
    },
    {
      "epoch": 0.02719,
      "grad_norm": 0.7747921431969758,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 2719
    },
    {
      "epoch": 0.0272,
      "grad_norm": 0.8252071711518221,
      "learning_rate": 0.003,
      "loss": 4.1305,
      "step": 2720
    },
    {
      "epoch": 0.02721,
      "grad_norm": 0.9010417786683242,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 2721
    },
    {
      "epoch": 0.02722,
      "grad_norm": 1.0430685055558186,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 2722
    },
    {
      "epoch": 0.02723,
      "grad_norm": 0.8338446086253062,
      "learning_rate": 0.003,
      "loss": 4.1591,
      "step": 2723
    },
    {
      "epoch": 0.02724,
      "grad_norm": 0.7926892406317515,
      "learning_rate": 0.003,
      "loss": 4.1604,
      "step": 2724
    },
    {
      "epoch": 0.02725,
      "grad_norm": 0.9476051066490954,
      "learning_rate": 0.003,
      "loss": 4.1559,
      "step": 2725
    },
    {
      "epoch": 0.02726,
      "grad_norm": 0.9718868135312356,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 2726
    },
    {
      "epoch": 0.02727,
      "grad_norm": 0.8794969874731017,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 2727
    },
    {
      "epoch": 0.02728,
      "grad_norm": 0.7500620026401651,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 2728
    },
    {
      "epoch": 0.02729,
      "grad_norm": 0.682111991166883,
      "learning_rate": 0.003,
      "loss": 4.172,
      "step": 2729
    },
    {
      "epoch": 0.0273,
      "grad_norm": 0.5938416056877477,
      "learning_rate": 0.003,
      "loss": 4.1397,
      "step": 2730
    },
    {
      "epoch": 0.02731,
      "grad_norm": 0.5014324118447245,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 2731
    },
    {
      "epoch": 0.02732,
      "grad_norm": 0.5566218730286413,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 2732
    },
    {
      "epoch": 0.02733,
      "grad_norm": 0.656514130835437,
      "learning_rate": 0.003,
      "loss": 4.16,
      "step": 2733
    },
    {
      "epoch": 0.02734,
      "grad_norm": 0.7092083749191737,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 2734
    },
    {
      "epoch": 0.02735,
      "grad_norm": 0.7490351435778966,
      "learning_rate": 0.003,
      "loss": 4.1581,
      "step": 2735
    },
    {
      "epoch": 0.02736,
      "grad_norm": 0.8057410040143573,
      "learning_rate": 0.003,
      "loss": 4.1338,
      "step": 2736
    },
    {
      "epoch": 0.02737,
      "grad_norm": 0.7272389058341057,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 2737
    },
    {
      "epoch": 0.02738,
      "grad_norm": 0.7181903456270696,
      "learning_rate": 0.003,
      "loss": 4.1447,
      "step": 2738
    },
    {
      "epoch": 0.02739,
      "grad_norm": 0.8790015592136898,
      "learning_rate": 0.003,
      "loss": 4.1427,
      "step": 2739
    },
    {
      "epoch": 0.0274,
      "grad_norm": 1.0911668104806873,
      "learning_rate": 0.003,
      "loss": 4.1395,
      "step": 2740
    },
    {
      "epoch": 0.02741,
      "grad_norm": 1.0149366352031834,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 2741
    },
    {
      "epoch": 0.02742,
      "grad_norm": 0.8648703789133807,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 2742
    },
    {
      "epoch": 0.02743,
      "grad_norm": 0.8509616129669461,
      "learning_rate": 0.003,
      "loss": 4.1613,
      "step": 2743
    },
    {
      "epoch": 0.02744,
      "grad_norm": 0.7514267415016833,
      "learning_rate": 0.003,
      "loss": 4.1399,
      "step": 2744
    },
    {
      "epoch": 0.02745,
      "grad_norm": 0.666188856053761,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 2745
    },
    {
      "epoch": 0.02746,
      "grad_norm": 0.6204707613465867,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 2746
    },
    {
      "epoch": 0.02747,
      "grad_norm": 0.5745978206046833,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 2747
    },
    {
      "epoch": 0.02748,
      "grad_norm": 0.5095037008788326,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 2748
    },
    {
      "epoch": 0.02749,
      "grad_norm": 0.4918061860336399,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 2749
    },
    {
      "epoch": 0.0275,
      "grad_norm": 0.4689633122885494,
      "learning_rate": 0.003,
      "loss": 4.1398,
      "step": 2750
    },
    {
      "epoch": 0.02751,
      "grad_norm": 0.5240834818389507,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 2751
    },
    {
      "epoch": 0.02752,
      "grad_norm": 0.45737584283965704,
      "learning_rate": 0.003,
      "loss": 4.1373,
      "step": 2752
    },
    {
      "epoch": 0.02753,
      "grad_norm": 0.4854554116683414,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 2753
    },
    {
      "epoch": 0.02754,
      "grad_norm": 0.5451937888698382,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 2754
    },
    {
      "epoch": 0.02755,
      "grad_norm": 0.6902713768999703,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 2755
    },
    {
      "epoch": 0.02756,
      "grad_norm": 0.9172618359251131,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 2756
    },
    {
      "epoch": 0.02757,
      "grad_norm": 1.264093714132492,
      "learning_rate": 0.003,
      "loss": 4.1713,
      "step": 2757
    },
    {
      "epoch": 0.02758,
      "grad_norm": 0.6764873385501415,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 2758
    },
    {
      "epoch": 0.02759,
      "grad_norm": 0.6735199753323651,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 2759
    },
    {
      "epoch": 0.0276,
      "grad_norm": 0.7869359581922244,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 2760
    },
    {
      "epoch": 0.02761,
      "grad_norm": 0.8232479826802365,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 2761
    },
    {
      "epoch": 0.02762,
      "grad_norm": 0.8094991367198255,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 2762
    },
    {
      "epoch": 0.02763,
      "grad_norm": 0.808838083900847,
      "learning_rate": 0.003,
      "loss": 4.1393,
      "step": 2763
    },
    {
      "epoch": 0.02764,
      "grad_norm": 0.9499840304387701,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 2764
    },
    {
      "epoch": 0.02765,
      "grad_norm": 1.1661515206508581,
      "learning_rate": 0.003,
      "loss": 4.1535,
      "step": 2765
    },
    {
      "epoch": 0.02766,
      "grad_norm": 0.8834816047449258,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 2766
    },
    {
      "epoch": 0.02767,
      "grad_norm": 0.9679955840143722,
      "learning_rate": 0.003,
      "loss": 4.1737,
      "step": 2767
    },
    {
      "epoch": 0.02768,
      "grad_norm": 1.0122550261242687,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 2768
    },
    {
      "epoch": 0.02769,
      "grad_norm": 0.936589272381051,
      "learning_rate": 0.003,
      "loss": 4.1722,
      "step": 2769
    },
    {
      "epoch": 0.0277,
      "grad_norm": 0.8812342595154814,
      "learning_rate": 0.003,
      "loss": 4.1383,
      "step": 2770
    },
    {
      "epoch": 0.02771,
      "grad_norm": 0.8385099378903244,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 2771
    },
    {
      "epoch": 0.02772,
      "grad_norm": 0.8615548972194946,
      "learning_rate": 0.003,
      "loss": 4.1184,
      "step": 2772
    },
    {
      "epoch": 0.02773,
      "grad_norm": 0.7470925818947043,
      "learning_rate": 0.003,
      "loss": 4.1495,
      "step": 2773
    },
    {
      "epoch": 0.02774,
      "grad_norm": 0.7432890220276933,
      "learning_rate": 0.003,
      "loss": 4.1713,
      "step": 2774
    },
    {
      "epoch": 0.02775,
      "grad_norm": 0.714251751808306,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 2775
    },
    {
      "epoch": 0.02776,
      "grad_norm": 0.6696685091947796,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 2776
    },
    {
      "epoch": 0.02777,
      "grad_norm": 0.6601977907009066,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 2777
    },
    {
      "epoch": 0.02778,
      "grad_norm": 0.6363902696248916,
      "learning_rate": 0.003,
      "loss": 4.1509,
      "step": 2778
    },
    {
      "epoch": 0.02779,
      "grad_norm": 0.7347310050156719,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 2779
    },
    {
      "epoch": 0.0278,
      "grad_norm": 0.8193092944005641,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 2780
    },
    {
      "epoch": 0.02781,
      "grad_norm": 0.9505772704924963,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 2781
    },
    {
      "epoch": 0.02782,
      "grad_norm": 1.09253758346844,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 2782
    },
    {
      "epoch": 0.02783,
      "grad_norm": 0.7922623009085619,
      "learning_rate": 0.003,
      "loss": 4.1407,
      "step": 2783
    },
    {
      "epoch": 0.02784,
      "grad_norm": 0.7730396671741616,
      "learning_rate": 0.003,
      "loss": 4.136,
      "step": 2784
    },
    {
      "epoch": 0.02785,
      "grad_norm": 0.8111585358679776,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 2785
    },
    {
      "epoch": 0.02786,
      "grad_norm": 0.7983751779273013,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 2786
    },
    {
      "epoch": 0.02787,
      "grad_norm": 0.8522172170925804,
      "learning_rate": 0.003,
      "loss": 4.1468,
      "step": 2787
    },
    {
      "epoch": 0.02788,
      "grad_norm": 0.8952452301797756,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 2788
    },
    {
      "epoch": 0.02789,
      "grad_norm": 0.9315897488998124,
      "learning_rate": 0.003,
      "loss": 4.1439,
      "step": 2789
    },
    {
      "epoch": 0.0279,
      "grad_norm": 0.7715461952206646,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 2790
    },
    {
      "epoch": 0.02791,
      "grad_norm": 0.7419347495262759,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 2791
    },
    {
      "epoch": 0.02792,
      "grad_norm": 0.804466637477434,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 2792
    },
    {
      "epoch": 0.02793,
      "grad_norm": 0.9499312139027115,
      "learning_rate": 0.003,
      "loss": 4.1481,
      "step": 2793
    },
    {
      "epoch": 0.02794,
      "grad_norm": 1.079651373139261,
      "learning_rate": 0.003,
      "loss": 4.1503,
      "step": 2794
    },
    {
      "epoch": 0.02795,
      "grad_norm": 0.7784790735501392,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 2795
    },
    {
      "epoch": 0.02796,
      "grad_norm": 0.7016883329409419,
      "learning_rate": 0.003,
      "loss": 4.1447,
      "step": 2796
    },
    {
      "epoch": 0.02797,
      "grad_norm": 0.8527425055953903,
      "learning_rate": 0.003,
      "loss": 4.1615,
      "step": 2797
    },
    {
      "epoch": 0.02798,
      "grad_norm": 0.8344419810312547,
      "learning_rate": 0.003,
      "loss": 4.1599,
      "step": 2798
    },
    {
      "epoch": 0.02799,
      "grad_norm": 0.8118333253983723,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 2799
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.8276964023163034,
      "learning_rate": 0.003,
      "loss": 4.154,
      "step": 2800
    },
    {
      "epoch": 0.02801,
      "grad_norm": 0.8829595698308325,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 2801
    },
    {
      "epoch": 0.02802,
      "grad_norm": 0.9178695537376129,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 2802
    },
    {
      "epoch": 0.02803,
      "grad_norm": 0.773306657979122,
      "learning_rate": 0.003,
      "loss": 4.1566,
      "step": 2803
    },
    {
      "epoch": 0.02804,
      "grad_norm": 0.6894758923555496,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 2804
    },
    {
      "epoch": 0.02805,
      "grad_norm": 0.6917993567356406,
      "learning_rate": 0.003,
      "loss": 4.1566,
      "step": 2805
    },
    {
      "epoch": 0.02806,
      "grad_norm": 0.6387085680780493,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 2806
    },
    {
      "epoch": 0.02807,
      "grad_norm": 0.580517980379294,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 2807
    },
    {
      "epoch": 0.02808,
      "grad_norm": 0.4980072648857084,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 2808
    },
    {
      "epoch": 0.02809,
      "grad_norm": 0.5292621050422022,
      "learning_rate": 0.003,
      "loss": 4.1503,
      "step": 2809
    },
    {
      "epoch": 0.0281,
      "grad_norm": 0.5662997833368786,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 2810
    },
    {
      "epoch": 0.02811,
      "grad_norm": 0.6056304842841205,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 2811
    },
    {
      "epoch": 0.02812,
      "grad_norm": 0.6510291438212791,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 2812
    },
    {
      "epoch": 0.02813,
      "grad_norm": 0.6856589618404986,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 2813
    },
    {
      "epoch": 0.02814,
      "grad_norm": 0.7345790506941087,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 2814
    },
    {
      "epoch": 0.02815,
      "grad_norm": 0.7816359381581716,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 2815
    },
    {
      "epoch": 0.02816,
      "grad_norm": 0.8925647710841813,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 2816
    },
    {
      "epoch": 0.02817,
      "grad_norm": 1.169228788902705,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 2817
    },
    {
      "epoch": 0.02818,
      "grad_norm": 0.9618528564502903,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 2818
    },
    {
      "epoch": 0.02819,
      "grad_norm": 1.0178604687945154,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 2819
    },
    {
      "epoch": 0.0282,
      "grad_norm": 0.8729007051855019,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 2820
    },
    {
      "epoch": 0.02821,
      "grad_norm": 0.7594720998624757,
      "learning_rate": 0.003,
      "loss": 4.1557,
      "step": 2821
    },
    {
      "epoch": 0.02822,
      "grad_norm": 0.7764641169305468,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 2822
    },
    {
      "epoch": 0.02823,
      "grad_norm": 0.79863443820284,
      "learning_rate": 0.003,
      "loss": 4.154,
      "step": 2823
    },
    {
      "epoch": 0.02824,
      "grad_norm": 0.7182103089348308,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 2824
    },
    {
      "epoch": 0.02825,
      "grad_norm": 0.7529407236815215,
      "learning_rate": 0.003,
      "loss": 4.1425,
      "step": 2825
    },
    {
      "epoch": 0.02826,
      "grad_norm": 0.8056220195019022,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 2826
    },
    {
      "epoch": 0.02827,
      "grad_norm": 0.8893025905667793,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 2827
    },
    {
      "epoch": 0.02828,
      "grad_norm": 0.9458155387571202,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 2828
    },
    {
      "epoch": 0.02829,
      "grad_norm": 1.0816962232960836,
      "learning_rate": 0.003,
      "loss": 4.1852,
      "step": 2829
    },
    {
      "epoch": 0.0283,
      "grad_norm": 0.85593992308399,
      "learning_rate": 0.003,
      "loss": 4.1432,
      "step": 2830
    },
    {
      "epoch": 0.02831,
      "grad_norm": 0.8103547032302357,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 2831
    },
    {
      "epoch": 0.02832,
      "grad_norm": 1.0004568820848851,
      "learning_rate": 0.003,
      "loss": 4.1611,
      "step": 2832
    },
    {
      "epoch": 0.02833,
      "grad_norm": 1.2257623869393093,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 2833
    },
    {
      "epoch": 0.02834,
      "grad_norm": 0.8959695287844174,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 2834
    },
    {
      "epoch": 0.02835,
      "grad_norm": 0.9046726088007764,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 2835
    },
    {
      "epoch": 0.02836,
      "grad_norm": 0.9388179555413667,
      "learning_rate": 0.003,
      "loss": 4.1634,
      "step": 2836
    },
    {
      "epoch": 0.02837,
      "grad_norm": 0.8876864866399585,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 2837
    },
    {
      "epoch": 0.02838,
      "grad_norm": 0.93231141838983,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 2838
    },
    {
      "epoch": 0.02839,
      "grad_norm": 0.8358991135319658,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 2839
    },
    {
      "epoch": 0.0284,
      "grad_norm": 0.8834561842613636,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 2840
    },
    {
      "epoch": 0.02841,
      "grad_norm": 0.9006412651182929,
      "learning_rate": 0.003,
      "loss": 4.1522,
      "step": 2841
    },
    {
      "epoch": 0.02842,
      "grad_norm": 0.9446862919101724,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 2842
    },
    {
      "epoch": 0.02843,
      "grad_norm": 0.8098223520337203,
      "learning_rate": 0.003,
      "loss": 4.1599,
      "step": 2843
    },
    {
      "epoch": 0.02844,
      "grad_norm": 0.7322010149090498,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 2844
    },
    {
      "epoch": 0.02845,
      "grad_norm": 0.6471809615510085,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 2845
    },
    {
      "epoch": 0.02846,
      "grad_norm": 0.6878791837952257,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 2846
    },
    {
      "epoch": 0.02847,
      "grad_norm": 0.7696793006326048,
      "learning_rate": 0.003,
      "loss": 4.1788,
      "step": 2847
    },
    {
      "epoch": 0.02848,
      "grad_norm": 0.8913190904442756,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 2848
    },
    {
      "epoch": 0.02849,
      "grad_norm": 1.018118471142102,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 2849
    },
    {
      "epoch": 0.0285,
      "grad_norm": 0.9082110603942541,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 2850
    },
    {
      "epoch": 0.02851,
      "grad_norm": 0.822250823175019,
      "learning_rate": 0.003,
      "loss": 4.1725,
      "step": 2851
    },
    {
      "epoch": 0.02852,
      "grad_norm": 0.6052901464954267,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 2852
    },
    {
      "epoch": 0.02853,
      "grad_norm": 0.5735422812437921,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 2853
    },
    {
      "epoch": 0.02854,
      "grad_norm": 0.5429930679913405,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 2854
    },
    {
      "epoch": 0.02855,
      "grad_norm": 0.5339097730360199,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 2855
    },
    {
      "epoch": 0.02856,
      "grad_norm": 0.5860774205400787,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 2856
    },
    {
      "epoch": 0.02857,
      "grad_norm": 0.6433890086945638,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 2857
    },
    {
      "epoch": 0.02858,
      "grad_norm": 0.7879326106492056,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 2858
    },
    {
      "epoch": 0.02859,
      "grad_norm": 0.8693860304588211,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 2859
    },
    {
      "epoch": 0.0286,
      "grad_norm": 0.8079095294087945,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 2860
    },
    {
      "epoch": 0.02861,
      "grad_norm": 0.6656764279644372,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 2861
    },
    {
      "epoch": 0.02862,
      "grad_norm": 0.6696791424270172,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 2862
    },
    {
      "epoch": 0.02863,
      "grad_norm": 0.7769070451321298,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 2863
    },
    {
      "epoch": 0.02864,
      "grad_norm": 0.8224098496945286,
      "learning_rate": 0.003,
      "loss": 4.1241,
      "step": 2864
    },
    {
      "epoch": 0.02865,
      "grad_norm": 0.8864904617013009,
      "learning_rate": 0.003,
      "loss": 4.1583,
      "step": 2865
    },
    {
      "epoch": 0.02866,
      "grad_norm": 0.9583148110901643,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 2866
    },
    {
      "epoch": 0.02867,
      "grad_norm": 0.8904807738138028,
      "learning_rate": 0.003,
      "loss": 4.1281,
      "step": 2867
    },
    {
      "epoch": 0.02868,
      "grad_norm": 0.9293551806467387,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 2868
    },
    {
      "epoch": 0.02869,
      "grad_norm": 1.0457657175233683,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 2869
    },
    {
      "epoch": 0.0287,
      "grad_norm": 1.0953444171616145,
      "learning_rate": 0.003,
      "loss": 4.1636,
      "step": 2870
    },
    {
      "epoch": 0.02871,
      "grad_norm": 0.7587885970460945,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 2871
    },
    {
      "epoch": 0.02872,
      "grad_norm": 0.6777165872502906,
      "learning_rate": 0.003,
      "loss": 4.1681,
      "step": 2872
    },
    {
      "epoch": 0.02873,
      "grad_norm": 0.8924775793037929,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 2873
    },
    {
      "epoch": 0.02874,
      "grad_norm": 0.9191095226023763,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 2874
    },
    {
      "epoch": 0.02875,
      "grad_norm": 1.0057201785085328,
      "learning_rate": 0.003,
      "loss": 4.1702,
      "step": 2875
    },
    {
      "epoch": 0.02876,
      "grad_norm": 0.9084894421524936,
      "learning_rate": 0.003,
      "loss": 4.1504,
      "step": 2876
    },
    {
      "epoch": 0.02877,
      "grad_norm": 0.9095891721863432,
      "learning_rate": 0.003,
      "loss": 4.1305,
      "step": 2877
    },
    {
      "epoch": 0.02878,
      "grad_norm": 0.9172527890033496,
      "learning_rate": 0.003,
      "loss": 4.1286,
      "step": 2878
    },
    {
      "epoch": 0.02879,
      "grad_norm": 0.8805567763002417,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 2879
    },
    {
      "epoch": 0.0288,
      "grad_norm": 0.9996123727255034,
      "learning_rate": 0.003,
      "loss": 4.1332,
      "step": 2880
    },
    {
      "epoch": 0.02881,
      "grad_norm": 1.046330847202451,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 2881
    },
    {
      "epoch": 0.02882,
      "grad_norm": 0.772117255675699,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 2882
    },
    {
      "epoch": 0.02883,
      "grad_norm": 0.7008407026763371,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 2883
    },
    {
      "epoch": 0.02884,
      "grad_norm": 0.778782483892839,
      "learning_rate": 0.003,
      "loss": 4.177,
      "step": 2884
    },
    {
      "epoch": 0.02885,
      "grad_norm": 0.7648524574296616,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 2885
    },
    {
      "epoch": 0.02886,
      "grad_norm": 0.6944709668733141,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 2886
    },
    {
      "epoch": 0.02887,
      "grad_norm": 0.6701833801380009,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 2887
    },
    {
      "epoch": 0.02888,
      "grad_norm": 0.6522446873326756,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 2888
    },
    {
      "epoch": 0.02889,
      "grad_norm": 0.5695684026681817,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 2889
    },
    {
      "epoch": 0.0289,
      "grad_norm": 0.5387645552557849,
      "learning_rate": 0.003,
      "loss": 4.1434,
      "step": 2890
    },
    {
      "epoch": 0.02891,
      "grad_norm": 0.6068073035716243,
      "learning_rate": 0.003,
      "loss": 4.1589,
      "step": 2891
    },
    {
      "epoch": 0.02892,
      "grad_norm": 0.5642060774681064,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 2892
    },
    {
      "epoch": 0.02893,
      "grad_norm": 0.64295806093738,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 2893
    },
    {
      "epoch": 0.02894,
      "grad_norm": 0.7755044700890192,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 2894
    },
    {
      "epoch": 0.02895,
      "grad_norm": 0.7810569175434396,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 2895
    },
    {
      "epoch": 0.02896,
      "grad_norm": 0.5425405989527838,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 2896
    },
    {
      "epoch": 0.02897,
      "grad_norm": 0.469070597819537,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 2897
    },
    {
      "epoch": 0.02898,
      "grad_norm": 0.5139238004799116,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 2898
    },
    {
      "epoch": 0.02899,
      "grad_norm": 0.6544046694256337,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 2899
    },
    {
      "epoch": 0.029,
      "grad_norm": 0.7180257646745092,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 2900
    },
    {
      "epoch": 0.02901,
      "grad_norm": 0.6895214504299034,
      "learning_rate": 0.003,
      "loss": 4.1358,
      "step": 2901
    },
    {
      "epoch": 0.02902,
      "grad_norm": 0.6936366493807695,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 2902
    },
    {
      "epoch": 0.02903,
      "grad_norm": 0.7680000086672168,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 2903
    },
    {
      "epoch": 0.02904,
      "grad_norm": 1.0523944817643214,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 2904
    },
    {
      "epoch": 0.02905,
      "grad_norm": 1.051471635673892,
      "learning_rate": 0.003,
      "loss": 4.1582,
      "step": 2905
    },
    {
      "epoch": 0.02906,
      "grad_norm": 0.8511171595587916,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 2906
    },
    {
      "epoch": 0.02907,
      "grad_norm": 0.8224344496311664,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 2907
    },
    {
      "epoch": 0.02908,
      "grad_norm": 0.6999459176877906,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 2908
    },
    {
      "epoch": 0.02909,
      "grad_norm": 0.819486070194941,
      "learning_rate": 0.003,
      "loss": 4.1263,
      "step": 2909
    },
    {
      "epoch": 0.0291,
      "grad_norm": 1.066905831023775,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 2910
    },
    {
      "epoch": 0.02911,
      "grad_norm": 0.993942719913462,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 2911
    },
    {
      "epoch": 0.02912,
      "grad_norm": 0.9395324885369105,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 2912
    },
    {
      "epoch": 0.02913,
      "grad_norm": 0.9563813373046008,
      "learning_rate": 0.003,
      "loss": 4.1513,
      "step": 2913
    },
    {
      "epoch": 0.02914,
      "grad_norm": 1.204534731933291,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 2914
    },
    {
      "epoch": 0.02915,
      "grad_norm": 0.9555386893347948,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 2915
    },
    {
      "epoch": 0.02916,
      "grad_norm": 1.0824011254780639,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 2916
    },
    {
      "epoch": 0.02917,
      "grad_norm": 1.0942512414631864,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 2917
    },
    {
      "epoch": 0.02918,
      "grad_norm": 0.9071341774299415,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 2918
    },
    {
      "epoch": 0.02919,
      "grad_norm": 0.9246562992704038,
      "learning_rate": 0.003,
      "loss": 4.1768,
      "step": 2919
    },
    {
      "epoch": 0.0292,
      "grad_norm": 0.8971677038672106,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 2920
    },
    {
      "epoch": 0.02921,
      "grad_norm": 0.926961421602634,
      "learning_rate": 0.003,
      "loss": 4.1553,
      "step": 2921
    },
    {
      "epoch": 0.02922,
      "grad_norm": 1.0848027072453201,
      "learning_rate": 0.003,
      "loss": 4.161,
      "step": 2922
    },
    {
      "epoch": 0.02923,
      "grad_norm": 0.912142779077174,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 2923
    },
    {
      "epoch": 0.02924,
      "grad_norm": 0.9865161740552381,
      "learning_rate": 0.003,
      "loss": 4.1648,
      "step": 2924
    },
    {
      "epoch": 0.02925,
      "grad_norm": 1.0463105360491507,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 2925
    },
    {
      "epoch": 0.02926,
      "grad_norm": 0.9582122548230526,
      "learning_rate": 0.003,
      "loss": 4.1501,
      "step": 2926
    },
    {
      "epoch": 0.02927,
      "grad_norm": 0.8403311811917105,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 2927
    },
    {
      "epoch": 0.02928,
      "grad_norm": 0.7854536393801775,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 2928
    },
    {
      "epoch": 0.02929,
      "grad_norm": 0.7778022366151639,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 2929
    },
    {
      "epoch": 0.0293,
      "grad_norm": 0.7271690214257167,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 2930
    },
    {
      "epoch": 0.02931,
      "grad_norm": 0.782379273286375,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 2931
    },
    {
      "epoch": 0.02932,
      "grad_norm": 0.7974568484352181,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 2932
    },
    {
      "epoch": 0.02933,
      "grad_norm": 0.7828005432560315,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 2933
    },
    {
      "epoch": 0.02934,
      "grad_norm": 0.6614858211514127,
      "learning_rate": 0.003,
      "loss": 4.14,
      "step": 2934
    },
    {
      "epoch": 0.02935,
      "grad_norm": 0.5873856004708764,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 2935
    },
    {
      "epoch": 0.02936,
      "grad_norm": 0.578823462032298,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 2936
    },
    {
      "epoch": 0.02937,
      "grad_norm": 0.6078930790219389,
      "learning_rate": 0.003,
      "loss": 4.1209,
      "step": 2937
    },
    {
      "epoch": 0.02938,
      "grad_norm": 0.7146509387847667,
      "learning_rate": 0.003,
      "loss": 4.139,
      "step": 2938
    },
    {
      "epoch": 0.02939,
      "grad_norm": 0.8250738871743942,
      "learning_rate": 0.003,
      "loss": 4.1295,
      "step": 2939
    },
    {
      "epoch": 0.0294,
      "grad_norm": 0.8696648935923786,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 2940
    },
    {
      "epoch": 0.02941,
      "grad_norm": 0.9090371360813282,
      "learning_rate": 0.003,
      "loss": 4.1568,
      "step": 2941
    },
    {
      "epoch": 0.02942,
      "grad_norm": 0.8939350957264195,
      "learning_rate": 0.003,
      "loss": 4.1447,
      "step": 2942
    },
    {
      "epoch": 0.02943,
      "grad_norm": 0.7023666348770743,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 2943
    },
    {
      "epoch": 0.02944,
      "grad_norm": 0.7067049448574164,
      "learning_rate": 0.003,
      "loss": 4.1465,
      "step": 2944
    },
    {
      "epoch": 0.02945,
      "grad_norm": 0.6812738232543556,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 2945
    },
    {
      "epoch": 0.02946,
      "grad_norm": 0.6818360632430761,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 2946
    },
    {
      "epoch": 0.02947,
      "grad_norm": 0.6754875767553201,
      "learning_rate": 0.003,
      "loss": 4.1523,
      "step": 2947
    },
    {
      "epoch": 0.02948,
      "grad_norm": 0.6113651211997226,
      "learning_rate": 0.003,
      "loss": 4.1317,
      "step": 2948
    },
    {
      "epoch": 0.02949,
      "grad_norm": 0.5812056588270224,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 2949
    },
    {
      "epoch": 0.0295,
      "grad_norm": 0.6282215017847426,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 2950
    },
    {
      "epoch": 0.02951,
      "grad_norm": 0.7817687295389203,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 2951
    },
    {
      "epoch": 0.02952,
      "grad_norm": 1.0587479164041036,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 2952
    },
    {
      "epoch": 0.02953,
      "grad_norm": 0.9887356887399129,
      "learning_rate": 0.003,
      "loss": 4.1373,
      "step": 2953
    },
    {
      "epoch": 0.02954,
      "grad_norm": 0.7805100959975674,
      "learning_rate": 0.003,
      "loss": 4.1688,
      "step": 2954
    },
    {
      "epoch": 0.02955,
      "grad_norm": 0.6651525603429118,
      "learning_rate": 0.003,
      "loss": 4.138,
      "step": 2955
    },
    {
      "epoch": 0.02956,
      "grad_norm": 0.7699019886627679,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 2956
    },
    {
      "epoch": 0.02957,
      "grad_norm": 0.816443587627682,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 2957
    },
    {
      "epoch": 0.02958,
      "grad_norm": 0.7961877462112285,
      "learning_rate": 0.003,
      "loss": 4.1553,
      "step": 2958
    },
    {
      "epoch": 0.02959,
      "grad_norm": 0.7550243768723249,
      "learning_rate": 0.003,
      "loss": 4.1431,
      "step": 2959
    },
    {
      "epoch": 0.0296,
      "grad_norm": 0.7723125453491877,
      "learning_rate": 0.003,
      "loss": 4.1525,
      "step": 2960
    },
    {
      "epoch": 0.02961,
      "grad_norm": 0.8583607117371619,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 2961
    },
    {
      "epoch": 0.02962,
      "grad_norm": 0.8691287903706768,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 2962
    },
    {
      "epoch": 0.02963,
      "grad_norm": 0.8786112930172387,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 2963
    },
    {
      "epoch": 0.02964,
      "grad_norm": 0.9167891542829973,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 2964
    },
    {
      "epoch": 0.02965,
      "grad_norm": 0.9419888897278245,
      "learning_rate": 0.003,
      "loss": 4.1533,
      "step": 2965
    },
    {
      "epoch": 0.02966,
      "grad_norm": 1.1891655776183911,
      "learning_rate": 0.003,
      "loss": 4.1783,
      "step": 2966
    },
    {
      "epoch": 0.02967,
      "grad_norm": 1.0455487688038185,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 2967
    },
    {
      "epoch": 0.02968,
      "grad_norm": 1.0164630073257412,
      "learning_rate": 0.003,
      "loss": 4.155,
      "step": 2968
    },
    {
      "epoch": 0.02969,
      "grad_norm": 1.0185954831664046,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 2969
    },
    {
      "epoch": 0.0297,
      "grad_norm": 1.043969964180144,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 2970
    },
    {
      "epoch": 0.02971,
      "grad_norm": 0.8397020648022002,
      "learning_rate": 0.003,
      "loss": 4.1551,
      "step": 2971
    },
    {
      "epoch": 0.02972,
      "grad_norm": 0.6978014599627598,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 2972
    },
    {
      "epoch": 0.02973,
      "grad_norm": 0.7307935837042818,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 2973
    },
    {
      "epoch": 0.02974,
      "grad_norm": 0.8618322772785041,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 2974
    },
    {
      "epoch": 0.02975,
      "grad_norm": 0.9050029913314735,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 2975
    },
    {
      "epoch": 0.02976,
      "grad_norm": 0.8860984382067472,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 2976
    },
    {
      "epoch": 0.02977,
      "grad_norm": 0.6971184630795387,
      "learning_rate": 0.003,
      "loss": 4.1269,
      "step": 2977
    },
    {
      "epoch": 0.02978,
      "grad_norm": 0.7418889082376986,
      "learning_rate": 0.003,
      "loss": 4.1484,
      "step": 2978
    },
    {
      "epoch": 0.02979,
      "grad_norm": 0.7888236204090991,
      "learning_rate": 0.003,
      "loss": 4.133,
      "step": 2979
    },
    {
      "epoch": 0.0298,
      "grad_norm": 0.7867568676249742,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 2980
    },
    {
      "epoch": 0.02981,
      "grad_norm": 0.7844697697074997,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 2981
    },
    {
      "epoch": 0.02982,
      "grad_norm": 0.6873427232122642,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 2982
    },
    {
      "epoch": 0.02983,
      "grad_norm": 0.7086227659108,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 2983
    },
    {
      "epoch": 0.02984,
      "grad_norm": 0.7859850298348623,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 2984
    },
    {
      "epoch": 0.02985,
      "grad_norm": 0.7729797989684547,
      "learning_rate": 0.003,
      "loss": 4.1572,
      "step": 2985
    },
    {
      "epoch": 0.02986,
      "grad_norm": 0.6844032687054661,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 2986
    },
    {
      "epoch": 0.02987,
      "grad_norm": 0.5762042827564682,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 2987
    },
    {
      "epoch": 0.02988,
      "grad_norm": 0.5620617182133862,
      "learning_rate": 0.003,
      "loss": 4.1187,
      "step": 2988
    },
    {
      "epoch": 0.02989,
      "grad_norm": 0.5894029162100797,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 2989
    },
    {
      "epoch": 0.0299,
      "grad_norm": 0.5837394519047691,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 2990
    },
    {
      "epoch": 0.02991,
      "grad_norm": 0.5606386580984015,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 2991
    },
    {
      "epoch": 0.02992,
      "grad_norm": 0.6147897819301783,
      "learning_rate": 0.003,
      "loss": 4.1358,
      "step": 2992
    },
    {
      "epoch": 0.02993,
      "grad_norm": 0.6621349604754609,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 2993
    },
    {
      "epoch": 0.02994,
      "grad_norm": 0.8082428932736804,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 2994
    },
    {
      "epoch": 0.02995,
      "grad_norm": 0.9615508716688156,
      "learning_rate": 0.003,
      "loss": 4.1639,
      "step": 2995
    },
    {
      "epoch": 0.02996,
      "grad_norm": 0.9867828831321791,
      "learning_rate": 0.003,
      "loss": 4.1577,
      "step": 2996
    },
    {
      "epoch": 0.02997,
      "grad_norm": 0.6984846126670015,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 2997
    },
    {
      "epoch": 0.02998,
      "grad_norm": 0.7004936715134856,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 2998
    },
    {
      "epoch": 0.02999,
      "grad_norm": 0.8087922821030329,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 2999
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8187211297372818,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 3000
    },
    {
      "epoch": 0.03001,
      "grad_norm": 0.7682209061752201,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 3001
    },
    {
      "epoch": 0.03002,
      "grad_norm": 0.7895156191994268,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 3002
    },
    {
      "epoch": 0.03003,
      "grad_norm": 0.9473169165490495,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 3003
    },
    {
      "epoch": 0.03004,
      "grad_norm": 0.9772459835659143,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 3004
    },
    {
      "epoch": 0.03005,
      "grad_norm": 0.9271870841526649,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 3005
    },
    {
      "epoch": 0.03006,
      "grad_norm": 0.81719364295006,
      "learning_rate": 0.003,
      "loss": 4.1395,
      "step": 3006
    },
    {
      "epoch": 0.03007,
      "grad_norm": 0.8963225985425357,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 3007
    },
    {
      "epoch": 0.03008,
      "grad_norm": 0.7818424581508167,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 3008
    },
    {
      "epoch": 0.03009,
      "grad_norm": 0.8175800868725163,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 3009
    },
    {
      "epoch": 0.0301,
      "grad_norm": 0.8915634122617913,
      "learning_rate": 0.003,
      "loss": 4.1397,
      "step": 3010
    },
    {
      "epoch": 0.03011,
      "grad_norm": 0.9916010748888695,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 3011
    },
    {
      "epoch": 0.03012,
      "grad_norm": 0.9009840506519443,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 3012
    },
    {
      "epoch": 0.03013,
      "grad_norm": 0.8990400357883044,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 3013
    },
    {
      "epoch": 0.03014,
      "grad_norm": 0.8701803132962008,
      "learning_rate": 0.003,
      "loss": 4.1241,
      "step": 3014
    },
    {
      "epoch": 0.03015,
      "grad_norm": 0.9059467708321113,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 3015
    },
    {
      "epoch": 0.03016,
      "grad_norm": 0.8139710481414894,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 3016
    },
    {
      "epoch": 0.03017,
      "grad_norm": 0.9003169401719128,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 3017
    },
    {
      "epoch": 0.03018,
      "grad_norm": 0.9148332662761703,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 3018
    },
    {
      "epoch": 0.03019,
      "grad_norm": 0.8097272342713991,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 3019
    },
    {
      "epoch": 0.0302,
      "grad_norm": 0.8482835840133595,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 3020
    },
    {
      "epoch": 0.03021,
      "grad_norm": 0.7875221358593745,
      "learning_rate": 0.003,
      "loss": 4.1522,
      "step": 3021
    },
    {
      "epoch": 0.03022,
      "grad_norm": 0.792143621479002,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 3022
    },
    {
      "epoch": 0.03023,
      "grad_norm": 0.8897124061645492,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 3023
    },
    {
      "epoch": 0.03024,
      "grad_norm": 1.016846130536088,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 3024
    },
    {
      "epoch": 0.03025,
      "grad_norm": 0.9813514493806819,
      "learning_rate": 0.003,
      "loss": 4.1447,
      "step": 3025
    },
    {
      "epoch": 0.03026,
      "grad_norm": 0.9856440178637326,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 3026
    },
    {
      "epoch": 0.03027,
      "grad_norm": 1.0170462610037305,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 3027
    },
    {
      "epoch": 0.03028,
      "grad_norm": 0.9963929882829512,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 3028
    },
    {
      "epoch": 0.03029,
      "grad_norm": 1.1230470374601593,
      "learning_rate": 0.003,
      "loss": 4.1539,
      "step": 3029
    },
    {
      "epoch": 0.0303,
      "grad_norm": 0.9952469868078255,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 3030
    },
    {
      "epoch": 0.03031,
      "grad_norm": 0.7974194139158707,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 3031
    },
    {
      "epoch": 0.03032,
      "grad_norm": 0.813594268367442,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 3032
    },
    {
      "epoch": 0.03033,
      "grad_norm": 0.8000080591024091,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 3033
    },
    {
      "epoch": 0.03034,
      "grad_norm": 0.8702456358651927,
      "learning_rate": 0.003,
      "loss": 4.1388,
      "step": 3034
    },
    {
      "epoch": 0.03035,
      "grad_norm": 0.972910117224573,
      "learning_rate": 0.003,
      "loss": 4.1701,
      "step": 3035
    },
    {
      "epoch": 0.03036,
      "grad_norm": 0.946589194340261,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 3036
    },
    {
      "epoch": 0.03037,
      "grad_norm": 1.0013129497511943,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 3037
    },
    {
      "epoch": 0.03038,
      "grad_norm": 1.0504414618218458,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 3038
    },
    {
      "epoch": 0.03039,
      "grad_norm": 0.9552159328342077,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 3039
    },
    {
      "epoch": 0.0304,
      "grad_norm": 0.8125546290590606,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 3040
    },
    {
      "epoch": 0.03041,
      "grad_norm": 0.7829978292483236,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 3041
    },
    {
      "epoch": 0.03042,
      "grad_norm": 0.7611930258666472,
      "learning_rate": 0.003,
      "loss": 4.1671,
      "step": 3042
    },
    {
      "epoch": 0.03043,
      "grad_norm": 0.7149717720566953,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 3043
    },
    {
      "epoch": 0.03044,
      "grad_norm": 0.7624230406515938,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 3044
    },
    {
      "epoch": 0.03045,
      "grad_norm": 0.8408403467918055,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 3045
    },
    {
      "epoch": 0.03046,
      "grad_norm": 0.7851334779365589,
      "learning_rate": 0.003,
      "loss": 4.1328,
      "step": 3046
    },
    {
      "epoch": 0.03047,
      "grad_norm": 0.7122809857507961,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 3047
    },
    {
      "epoch": 0.03048,
      "grad_norm": 0.7964799855759337,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 3048
    },
    {
      "epoch": 0.03049,
      "grad_norm": 0.887239863638981,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 3049
    },
    {
      "epoch": 0.0305,
      "grad_norm": 0.9999215882595172,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 3050
    },
    {
      "epoch": 0.03051,
      "grad_norm": 1.0055098406963119,
      "learning_rate": 0.003,
      "loss": 4.1455,
      "step": 3051
    },
    {
      "epoch": 0.03052,
      "grad_norm": 0.9173091708203194,
      "learning_rate": 0.003,
      "loss": 4.146,
      "step": 3052
    },
    {
      "epoch": 0.03053,
      "grad_norm": 0.8629407073466706,
      "learning_rate": 0.003,
      "loss": 4.1506,
      "step": 3053
    },
    {
      "epoch": 0.03054,
      "grad_norm": 0.7744384974895352,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 3054
    },
    {
      "epoch": 0.03055,
      "grad_norm": 0.6670797287560947,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 3055
    },
    {
      "epoch": 0.03056,
      "grad_norm": 0.658467407296622,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 3056
    },
    {
      "epoch": 0.03057,
      "grad_norm": 0.6556069512553365,
      "learning_rate": 0.003,
      "loss": 4.1326,
      "step": 3057
    },
    {
      "epoch": 0.03058,
      "grad_norm": 0.5717711332151091,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 3058
    },
    {
      "epoch": 0.03059,
      "grad_norm": 0.493409526384602,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 3059
    },
    {
      "epoch": 0.0306,
      "grad_norm": 0.5082424754021986,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 3060
    },
    {
      "epoch": 0.03061,
      "grad_norm": 0.4898096783004193,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 3061
    },
    {
      "epoch": 0.03062,
      "grad_norm": 0.5061719030039263,
      "learning_rate": 0.003,
      "loss": 4.144,
      "step": 3062
    },
    {
      "epoch": 0.03063,
      "grad_norm": 0.6337804215976139,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 3063
    },
    {
      "epoch": 0.03064,
      "grad_norm": 0.7363596095895648,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 3064
    },
    {
      "epoch": 0.03065,
      "grad_norm": 0.7804431388454032,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 3065
    },
    {
      "epoch": 0.03066,
      "grad_norm": 0.8048441855814717,
      "learning_rate": 0.003,
      "loss": 4.14,
      "step": 3066
    },
    {
      "epoch": 0.03067,
      "grad_norm": 0.936660465754291,
      "learning_rate": 0.003,
      "loss": 4.1504,
      "step": 3067
    },
    {
      "epoch": 0.03068,
      "grad_norm": 1.199111088765608,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 3068
    },
    {
      "epoch": 0.03069,
      "grad_norm": 1.024080179977825,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 3069
    },
    {
      "epoch": 0.0307,
      "grad_norm": 0.9087822982161698,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 3070
    },
    {
      "epoch": 0.03071,
      "grad_norm": 0.773169782429931,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 3071
    },
    {
      "epoch": 0.03072,
      "grad_norm": 0.7385627562002322,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 3072
    },
    {
      "epoch": 0.03073,
      "grad_norm": 0.8590738611383513,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 3073
    },
    {
      "epoch": 0.03074,
      "grad_norm": 0.7460812381201545,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 3074
    },
    {
      "epoch": 0.03075,
      "grad_norm": 0.9060557817564706,
      "learning_rate": 0.003,
      "loss": 4.1505,
      "step": 3075
    },
    {
      "epoch": 0.03076,
      "grad_norm": 0.9352871058483179,
      "learning_rate": 0.003,
      "loss": 4.1495,
      "step": 3076
    },
    {
      "epoch": 0.03077,
      "grad_norm": 0.8648002758853153,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 3077
    },
    {
      "epoch": 0.03078,
      "grad_norm": 0.7596804078061197,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 3078
    },
    {
      "epoch": 0.03079,
      "grad_norm": 0.7757175523238533,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 3079
    },
    {
      "epoch": 0.0308,
      "grad_norm": 0.8209364936522558,
      "learning_rate": 0.003,
      "loss": 4.1425,
      "step": 3080
    },
    {
      "epoch": 0.03081,
      "grad_norm": 0.8477812326348159,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 3081
    },
    {
      "epoch": 0.03082,
      "grad_norm": 0.8013577673715985,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 3082
    },
    {
      "epoch": 0.03083,
      "grad_norm": 0.7134216891930999,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 3083
    },
    {
      "epoch": 0.03084,
      "grad_norm": 0.7257190995117091,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 3084
    },
    {
      "epoch": 0.03085,
      "grad_norm": 0.8088783726503086,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 3085
    },
    {
      "epoch": 0.03086,
      "grad_norm": 0.8932889669749948,
      "learning_rate": 0.003,
      "loss": 4.1502,
      "step": 3086
    },
    {
      "epoch": 0.03087,
      "grad_norm": 0.9271283171010463,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 3087
    },
    {
      "epoch": 0.03088,
      "grad_norm": 1.0610338130344832,
      "learning_rate": 0.003,
      "loss": 4.1534,
      "step": 3088
    },
    {
      "epoch": 0.03089,
      "grad_norm": 0.9082960513513103,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 3089
    },
    {
      "epoch": 0.0309,
      "grad_norm": 0.8382419044332593,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 3090
    },
    {
      "epoch": 0.03091,
      "grad_norm": 0.7721347908154246,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 3091
    },
    {
      "epoch": 0.03092,
      "grad_norm": 0.5770289909808661,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 3092
    },
    {
      "epoch": 0.03093,
      "grad_norm": 0.6048583748826258,
      "learning_rate": 0.003,
      "loss": 4.1335,
      "step": 3093
    },
    {
      "epoch": 0.03094,
      "grad_norm": 0.5039228690347607,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 3094
    },
    {
      "epoch": 0.03095,
      "grad_norm": 0.4932619648402882,
      "learning_rate": 0.003,
      "loss": 4.1254,
      "step": 3095
    },
    {
      "epoch": 0.03096,
      "grad_norm": 0.44699630710548827,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 3096
    },
    {
      "epoch": 0.03097,
      "grad_norm": 0.42800774052172613,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 3097
    },
    {
      "epoch": 0.03098,
      "grad_norm": 0.5150974229730493,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 3098
    },
    {
      "epoch": 0.03099,
      "grad_norm": 0.6808104155412404,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 3099
    },
    {
      "epoch": 0.031,
      "grad_norm": 1.1124941338393186,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 3100
    },
    {
      "epoch": 0.03101,
      "grad_norm": 1.0220222334230853,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 3101
    },
    {
      "epoch": 0.03102,
      "grad_norm": 0.6635321325690858,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 3102
    },
    {
      "epoch": 0.03103,
      "grad_norm": 0.7879413374703326,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 3103
    },
    {
      "epoch": 0.03104,
      "grad_norm": 1.0336622472450878,
      "learning_rate": 0.003,
      "loss": 4.1177,
      "step": 3104
    },
    {
      "epoch": 0.03105,
      "grad_norm": 0.8225297439648239,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 3105
    },
    {
      "epoch": 0.03106,
      "grad_norm": 0.9200113560338348,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 3106
    },
    {
      "epoch": 0.03107,
      "grad_norm": 0.8693837130689374,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 3107
    },
    {
      "epoch": 0.03108,
      "grad_norm": 0.7481880861084276,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 3108
    },
    {
      "epoch": 0.03109,
      "grad_norm": 0.7161901524718123,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 3109
    },
    {
      "epoch": 0.0311,
      "grad_norm": 0.7717771970106354,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 3110
    },
    {
      "epoch": 0.03111,
      "grad_norm": 0.7918589418817704,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 3111
    },
    {
      "epoch": 0.03112,
      "grad_norm": 0.8112295219654945,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 3112
    },
    {
      "epoch": 0.03113,
      "grad_norm": 0.8724658411048615,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 3113
    },
    {
      "epoch": 0.03114,
      "grad_norm": 1.0040711142290948,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 3114
    },
    {
      "epoch": 0.03115,
      "grad_norm": 1.2108105696861353,
      "learning_rate": 0.003,
      "loss": 4.1509,
      "step": 3115
    },
    {
      "epoch": 0.03116,
      "grad_norm": 0.9386810822379548,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 3116
    },
    {
      "epoch": 0.03117,
      "grad_norm": 0.7805193907050799,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 3117
    },
    {
      "epoch": 0.03118,
      "grad_norm": 0.9100639956456111,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 3118
    },
    {
      "epoch": 0.03119,
      "grad_norm": 0.9031494351027202,
      "learning_rate": 0.003,
      "loss": 4.1434,
      "step": 3119
    },
    {
      "epoch": 0.0312,
      "grad_norm": 0.9920122086984918,
      "learning_rate": 0.003,
      "loss": 4.154,
      "step": 3120
    },
    {
      "epoch": 0.03121,
      "grad_norm": 1.24679349546566,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 3121
    },
    {
      "epoch": 0.03122,
      "grad_norm": 0.8914719736220875,
      "learning_rate": 0.003,
      "loss": 4.1494,
      "step": 3122
    },
    {
      "epoch": 0.03123,
      "grad_norm": 0.7579991423334604,
      "learning_rate": 0.003,
      "loss": 4.1403,
      "step": 3123
    },
    {
      "epoch": 0.03124,
      "grad_norm": 0.7621871567104618,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 3124
    },
    {
      "epoch": 0.03125,
      "grad_norm": 0.8062917259017726,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 3125
    },
    {
      "epoch": 0.03126,
      "grad_norm": 0.833712338155859,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 3126
    },
    {
      "epoch": 0.03127,
      "grad_norm": 0.8998971933566047,
      "learning_rate": 0.003,
      "loss": 4.1339,
      "step": 3127
    },
    {
      "epoch": 0.03128,
      "grad_norm": 0.9380372829323406,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 3128
    },
    {
      "epoch": 0.03129,
      "grad_norm": 0.9521516498255859,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 3129
    },
    {
      "epoch": 0.0313,
      "grad_norm": 0.8951772330842159,
      "learning_rate": 0.003,
      "loss": 4.1496,
      "step": 3130
    },
    {
      "epoch": 0.03131,
      "grad_norm": 0.7755621391919874,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 3131
    },
    {
      "epoch": 0.03132,
      "grad_norm": 0.8129807814076891,
      "learning_rate": 0.003,
      "loss": 4.1478,
      "step": 3132
    },
    {
      "epoch": 0.03133,
      "grad_norm": 0.9303049881214197,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 3133
    },
    {
      "epoch": 0.03134,
      "grad_norm": 0.9415721268309895,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 3134
    },
    {
      "epoch": 0.03135,
      "grad_norm": 0.9104183542988432,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 3135
    },
    {
      "epoch": 0.03136,
      "grad_norm": 0.9100236222536682,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 3136
    },
    {
      "epoch": 0.03137,
      "grad_norm": 0.8798690651639479,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 3137
    },
    {
      "epoch": 0.03138,
      "grad_norm": 0.7903392795080382,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 3138
    },
    {
      "epoch": 0.03139,
      "grad_norm": 0.7667246215003086,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 3139
    },
    {
      "epoch": 0.0314,
      "grad_norm": 0.7544018295001089,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 3140
    },
    {
      "epoch": 0.03141,
      "grad_norm": 0.6716524130054017,
      "learning_rate": 0.003,
      "loss": 4.154,
      "step": 3141
    },
    {
      "epoch": 0.03142,
      "grad_norm": 0.6382378457514613,
      "learning_rate": 0.003,
      "loss": 4.1317,
      "step": 3142
    },
    {
      "epoch": 0.03143,
      "grad_norm": 0.7526757290066967,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 3143
    },
    {
      "epoch": 0.03144,
      "grad_norm": 0.938489146233584,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 3144
    },
    {
      "epoch": 0.03145,
      "grad_norm": 1.0493538668141305,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 3145
    },
    {
      "epoch": 0.03146,
      "grad_norm": 0.9048504750556206,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 3146
    },
    {
      "epoch": 0.03147,
      "grad_norm": 0.9152634263998236,
      "learning_rate": 0.003,
      "loss": 4.1297,
      "step": 3147
    },
    {
      "epoch": 0.03148,
      "grad_norm": 0.878607908387141,
      "learning_rate": 0.003,
      "loss": 4.1341,
      "step": 3148
    },
    {
      "epoch": 0.03149,
      "grad_norm": 0.6944713264838394,
      "learning_rate": 0.003,
      "loss": 4.1322,
      "step": 3149
    },
    {
      "epoch": 0.0315,
      "grad_norm": 0.6811030653109197,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 3150
    },
    {
      "epoch": 0.03151,
      "grad_norm": 0.7166494028947639,
      "learning_rate": 0.003,
      "loss": 4.1292,
      "step": 3151
    },
    {
      "epoch": 0.03152,
      "grad_norm": 0.7180332750616866,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 3152
    },
    {
      "epoch": 0.03153,
      "grad_norm": 0.8734661321822886,
      "learning_rate": 0.003,
      "loss": 4.1407,
      "step": 3153
    },
    {
      "epoch": 0.03154,
      "grad_norm": 0.9738879775883073,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 3154
    },
    {
      "epoch": 0.03155,
      "grad_norm": 0.8618576527781467,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 3155
    },
    {
      "epoch": 0.03156,
      "grad_norm": 0.7300214083601039,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 3156
    },
    {
      "epoch": 0.03157,
      "grad_norm": 0.6260105425384149,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 3157
    },
    {
      "epoch": 0.03158,
      "grad_norm": 0.682384994546209,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 3158
    },
    {
      "epoch": 0.03159,
      "grad_norm": 0.7718937672456789,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 3159
    },
    {
      "epoch": 0.0316,
      "grad_norm": 0.6586793613223971,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 3160
    },
    {
      "epoch": 0.03161,
      "grad_norm": 0.6187219141678427,
      "learning_rate": 0.003,
      "loss": 4.133,
      "step": 3161
    },
    {
      "epoch": 0.03162,
      "grad_norm": 0.7732103677060469,
      "learning_rate": 0.003,
      "loss": 4.1389,
      "step": 3162
    },
    {
      "epoch": 0.03163,
      "grad_norm": 0.9681852590593839,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 3163
    },
    {
      "epoch": 0.03164,
      "grad_norm": 0.983612124215567,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 3164
    },
    {
      "epoch": 0.03165,
      "grad_norm": 0.8828238997897765,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 3165
    },
    {
      "epoch": 0.03166,
      "grad_norm": 0.9082253980909,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 3166
    },
    {
      "epoch": 0.03167,
      "grad_norm": 0.8883995374606223,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 3167
    },
    {
      "epoch": 0.03168,
      "grad_norm": 0.8814873185877611,
      "learning_rate": 0.003,
      "loss": 4.1367,
      "step": 3168
    },
    {
      "epoch": 0.03169,
      "grad_norm": 0.8358140037799937,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 3169
    },
    {
      "epoch": 0.0317,
      "grad_norm": 0.9091839519331014,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 3170
    },
    {
      "epoch": 0.03171,
      "grad_norm": 0.7528676805145117,
      "learning_rate": 0.003,
      "loss": 4.1362,
      "step": 3171
    },
    {
      "epoch": 0.03172,
      "grad_norm": 0.6184044517416146,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 3172
    },
    {
      "epoch": 0.03173,
      "grad_norm": 0.7308731124601682,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 3173
    },
    {
      "epoch": 0.03174,
      "grad_norm": 0.8588565114301099,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 3174
    },
    {
      "epoch": 0.03175,
      "grad_norm": 1.2162741354980293,
      "learning_rate": 0.003,
      "loss": 4.1527,
      "step": 3175
    },
    {
      "epoch": 0.03176,
      "grad_norm": 0.9397594133644137,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 3176
    },
    {
      "epoch": 0.03177,
      "grad_norm": 0.9189809109610272,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 3177
    },
    {
      "epoch": 0.03178,
      "grad_norm": 0.8047272875332512,
      "learning_rate": 0.003,
      "loss": 4.1657,
      "step": 3178
    },
    {
      "epoch": 0.03179,
      "grad_norm": 0.932150949680668,
      "learning_rate": 0.003,
      "loss": 4.1268,
      "step": 3179
    },
    {
      "epoch": 0.0318,
      "grad_norm": 0.9592877689147434,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 3180
    },
    {
      "epoch": 0.03181,
      "grad_norm": 0.9595136242555029,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 3181
    },
    {
      "epoch": 0.03182,
      "grad_norm": 0.9565837686401868,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 3182
    },
    {
      "epoch": 0.03183,
      "grad_norm": 0.9741938675445059,
      "learning_rate": 0.003,
      "loss": 4.1478,
      "step": 3183
    },
    {
      "epoch": 0.03184,
      "grad_norm": 0.9857805137742832,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 3184
    },
    {
      "epoch": 0.03185,
      "grad_norm": 0.9176062885733086,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 3185
    },
    {
      "epoch": 0.03186,
      "grad_norm": 0.8884334076590751,
      "learning_rate": 0.003,
      "loss": 4.1367,
      "step": 3186
    },
    {
      "epoch": 0.03187,
      "grad_norm": 0.8658125563673901,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 3187
    },
    {
      "epoch": 0.03188,
      "grad_norm": 0.8999051178552305,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 3188
    },
    {
      "epoch": 0.03189,
      "grad_norm": 0.7859126574348619,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 3189
    },
    {
      "epoch": 0.0319,
      "grad_norm": 0.8606531029730619,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 3190
    },
    {
      "epoch": 0.03191,
      "grad_norm": 0.9918147629707751,
      "learning_rate": 0.003,
      "loss": 4.149,
      "step": 3191
    },
    {
      "epoch": 0.03192,
      "grad_norm": 1.0056390969187974,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 3192
    },
    {
      "epoch": 0.03193,
      "grad_norm": 0.8860275310714892,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 3193
    },
    {
      "epoch": 0.03194,
      "grad_norm": 0.9173243755728898,
      "learning_rate": 0.003,
      "loss": 4.1534,
      "step": 3194
    },
    {
      "epoch": 0.03195,
      "grad_norm": 0.8392077257777418,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 3195
    },
    {
      "epoch": 0.03196,
      "grad_norm": 0.7352847590623333,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 3196
    },
    {
      "epoch": 0.03197,
      "grad_norm": 0.715497912918627,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 3197
    },
    {
      "epoch": 0.03198,
      "grad_norm": 0.6376443032868672,
      "learning_rate": 0.003,
      "loss": 4.1222,
      "step": 3198
    },
    {
      "epoch": 0.03199,
      "grad_norm": 0.5811704510301426,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 3199
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.5381272704276707,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 3200
    },
    {
      "epoch": 0.03201,
      "grad_norm": 0.5120575482476429,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 3201
    },
    {
      "epoch": 0.03202,
      "grad_norm": 0.5958579649120576,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 3202
    },
    {
      "epoch": 0.03203,
      "grad_norm": 0.7890366994930119,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 3203
    },
    {
      "epoch": 0.03204,
      "grad_norm": 0.9077676416657651,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 3204
    },
    {
      "epoch": 0.03205,
      "grad_norm": 0.754000654803963,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 3205
    },
    {
      "epoch": 0.03206,
      "grad_norm": 0.600502725716816,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 3206
    },
    {
      "epoch": 0.03207,
      "grad_norm": 0.6048683504208053,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 3207
    },
    {
      "epoch": 0.03208,
      "grad_norm": 0.7129517255787963,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 3208
    },
    {
      "epoch": 0.03209,
      "grad_norm": 0.8158420961472652,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 3209
    },
    {
      "epoch": 0.0321,
      "grad_norm": 0.7005791064338225,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 3210
    },
    {
      "epoch": 0.03211,
      "grad_norm": 0.6098399298288999,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 3211
    },
    {
      "epoch": 0.03212,
      "grad_norm": 0.6538561751290226,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 3212
    },
    {
      "epoch": 0.03213,
      "grad_norm": 0.5790618052777453,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 3213
    },
    {
      "epoch": 0.03214,
      "grad_norm": 0.6875003076630353,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 3214
    },
    {
      "epoch": 0.03215,
      "grad_norm": 0.799314724709517,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 3215
    },
    {
      "epoch": 0.03216,
      "grad_norm": 0.8890107745271224,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 3216
    },
    {
      "epoch": 0.03217,
      "grad_norm": 0.7725727639400009,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 3217
    },
    {
      "epoch": 0.03218,
      "grad_norm": 0.7089931384963014,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 3218
    },
    {
      "epoch": 0.03219,
      "grad_norm": 0.755051345678096,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 3219
    },
    {
      "epoch": 0.0322,
      "grad_norm": 0.9521310432872768,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 3220
    },
    {
      "epoch": 0.03221,
      "grad_norm": 0.8616808278278768,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 3221
    },
    {
      "epoch": 0.03222,
      "grad_norm": 0.873993324764959,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 3222
    },
    {
      "epoch": 0.03223,
      "grad_norm": 0.8264507288043698,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 3223
    },
    {
      "epoch": 0.03224,
      "grad_norm": 0.8289275480200801,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 3224
    },
    {
      "epoch": 0.03225,
      "grad_norm": 0.7514528620660997,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 3225
    },
    {
      "epoch": 0.03226,
      "grad_norm": 0.7197038673889892,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 3226
    },
    {
      "epoch": 0.03227,
      "grad_norm": 0.8892719165128083,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 3227
    },
    {
      "epoch": 0.03228,
      "grad_norm": 0.9570294472379692,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 3228
    },
    {
      "epoch": 0.03229,
      "grad_norm": 0.9375751530879919,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 3229
    },
    {
      "epoch": 0.0323,
      "grad_norm": 0.9755322805010285,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 3230
    },
    {
      "epoch": 0.03231,
      "grad_norm": 1.2740697032530537,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 3231
    },
    {
      "epoch": 0.03232,
      "grad_norm": 0.8205568087154713,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 3232
    },
    {
      "epoch": 0.03233,
      "grad_norm": 0.7719838274157066,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 3233
    },
    {
      "epoch": 0.03234,
      "grad_norm": 0.7880067935874491,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 3234
    },
    {
      "epoch": 0.03235,
      "grad_norm": 0.8025776008313478,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 3235
    },
    {
      "epoch": 0.03236,
      "grad_norm": 1.0153689767602323,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 3236
    },
    {
      "epoch": 0.03237,
      "grad_norm": 1.0960896554896313,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 3237
    },
    {
      "epoch": 0.03238,
      "grad_norm": 0.9653438379897337,
      "learning_rate": 0.003,
      "loss": 4.1538,
      "step": 3238
    },
    {
      "epoch": 0.03239,
      "grad_norm": 0.9477216778661465,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 3239
    },
    {
      "epoch": 0.0324,
      "grad_norm": 1.0000644526629416,
      "learning_rate": 0.003,
      "loss": 4.1522,
      "step": 3240
    },
    {
      "epoch": 0.03241,
      "grad_norm": 0.9623250673117251,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 3241
    },
    {
      "epoch": 0.03242,
      "grad_norm": 1.00080805991717,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 3242
    },
    {
      "epoch": 0.03243,
      "grad_norm": 0.864922721509246,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 3243
    },
    {
      "epoch": 0.03244,
      "grad_norm": 0.7896866879394736,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 3244
    },
    {
      "epoch": 0.03245,
      "grad_norm": 0.723475186710102,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 3245
    },
    {
      "epoch": 0.03246,
      "grad_norm": 0.6786037554737262,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 3246
    },
    {
      "epoch": 0.03247,
      "grad_norm": 0.6637920051672874,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 3247
    },
    {
      "epoch": 0.03248,
      "grad_norm": 0.7161721376620257,
      "learning_rate": 0.003,
      "loss": 4.1439,
      "step": 3248
    },
    {
      "epoch": 0.03249,
      "grad_norm": 0.7558986889375228,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 3249
    },
    {
      "epoch": 0.0325,
      "grad_norm": 0.7122471010381649,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 3250
    },
    {
      "epoch": 0.03251,
      "grad_norm": 0.7576764147111074,
      "learning_rate": 0.003,
      "loss": 4.1378,
      "step": 3251
    },
    {
      "epoch": 0.03252,
      "grad_norm": 0.6662542349208606,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 3252
    },
    {
      "epoch": 0.03253,
      "grad_norm": 0.6584169138349598,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 3253
    },
    {
      "epoch": 0.03254,
      "grad_norm": 0.7740732689052414,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 3254
    },
    {
      "epoch": 0.03255,
      "grad_norm": 0.9473846923391861,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 3255
    },
    {
      "epoch": 0.03256,
      "grad_norm": 0.929085241706198,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 3256
    },
    {
      "epoch": 0.03257,
      "grad_norm": 0.9501158349657576,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 3257
    },
    {
      "epoch": 0.03258,
      "grad_norm": 0.9141127030789927,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 3258
    },
    {
      "epoch": 0.03259,
      "grad_norm": 0.7903443980069681,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 3259
    },
    {
      "epoch": 0.0326,
      "grad_norm": 0.7186252333798928,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 3260
    },
    {
      "epoch": 0.03261,
      "grad_norm": 0.6176139155287416,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 3261
    },
    {
      "epoch": 0.03262,
      "grad_norm": 0.5770619992009712,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 3262
    },
    {
      "epoch": 0.03263,
      "grad_norm": 0.6179754292775074,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 3263
    },
    {
      "epoch": 0.03264,
      "grad_norm": 0.7950184256183296,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 3264
    },
    {
      "epoch": 0.03265,
      "grad_norm": 0.8260324885629711,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 3265
    },
    {
      "epoch": 0.03266,
      "grad_norm": 0.9331143722743607,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 3266
    },
    {
      "epoch": 0.03267,
      "grad_norm": 0.8951624994125074,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 3267
    },
    {
      "epoch": 0.03268,
      "grad_norm": 0.8052096070419401,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 3268
    },
    {
      "epoch": 0.03269,
      "grad_norm": 0.8166220426949835,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 3269
    },
    {
      "epoch": 0.0327,
      "grad_norm": 0.7018775720106071,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 3270
    },
    {
      "epoch": 0.03271,
      "grad_norm": 0.7111861211006557,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 3271
    },
    {
      "epoch": 0.03272,
      "grad_norm": 0.8446103418486255,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 3272
    },
    {
      "epoch": 0.03273,
      "grad_norm": 0.9056741913837429,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 3273
    },
    {
      "epoch": 0.03274,
      "grad_norm": 0.8569785882714194,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 3274
    },
    {
      "epoch": 0.03275,
      "grad_norm": 0.8267745539239577,
      "learning_rate": 0.003,
      "loss": 4.1349,
      "step": 3275
    },
    {
      "epoch": 0.03276,
      "grad_norm": 0.8644593844634266,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 3276
    },
    {
      "epoch": 0.03277,
      "grad_norm": 1.0535041439341994,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 3277
    },
    {
      "epoch": 0.03278,
      "grad_norm": 0.993769735898057,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 3278
    },
    {
      "epoch": 0.03279,
      "grad_norm": 1.1362817862452346,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 3279
    },
    {
      "epoch": 0.0328,
      "grad_norm": 0.785895471391003,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 3280
    },
    {
      "epoch": 0.03281,
      "grad_norm": 0.8032123764058076,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 3281
    },
    {
      "epoch": 0.03282,
      "grad_norm": 0.9387436206649247,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 3282
    },
    {
      "epoch": 0.03283,
      "grad_norm": 1.1369187077903837,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 3283
    },
    {
      "epoch": 0.03284,
      "grad_norm": 0.9173031886562234,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 3284
    },
    {
      "epoch": 0.03285,
      "grad_norm": 1.0358430479898526,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 3285
    },
    {
      "epoch": 0.03286,
      "grad_norm": 0.8561133575195394,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 3286
    },
    {
      "epoch": 0.03287,
      "grad_norm": 0.8329432797036997,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 3287
    },
    {
      "epoch": 0.03288,
      "grad_norm": 0.8334315378634501,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 3288
    },
    {
      "epoch": 0.03289,
      "grad_norm": 0.6838577814234701,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 3289
    },
    {
      "epoch": 0.0329,
      "grad_norm": 0.6781901884347332,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 3290
    },
    {
      "epoch": 0.03291,
      "grad_norm": 0.6974761035595561,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 3291
    },
    {
      "epoch": 0.03292,
      "grad_norm": 0.8026120729452656,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 3292
    },
    {
      "epoch": 0.03293,
      "grad_norm": 0.8409575609279364,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 3293
    },
    {
      "epoch": 0.03294,
      "grad_norm": 0.9672433152587747,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 3294
    },
    {
      "epoch": 0.03295,
      "grad_norm": 0.952646103687869,
      "learning_rate": 0.003,
      "loss": 4.117,
      "step": 3295
    },
    {
      "epoch": 0.03296,
      "grad_norm": 0.898170814180958,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 3296
    },
    {
      "epoch": 0.03297,
      "grad_norm": 1.021226648982521,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 3297
    },
    {
      "epoch": 0.03298,
      "grad_norm": 1.0948474288454675,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 3298
    },
    {
      "epoch": 0.03299,
      "grad_norm": 1.0192347868788103,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 3299
    },
    {
      "epoch": 0.033,
      "grad_norm": 1.0495723694795078,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 3300
    },
    {
      "epoch": 0.03301,
      "grad_norm": 1.0851645957051337,
      "learning_rate": 0.003,
      "loss": 4.155,
      "step": 3301
    },
    {
      "epoch": 0.03302,
      "grad_norm": 0.9231732440912226,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 3302
    },
    {
      "epoch": 0.03303,
      "grad_norm": 0.8311956950138633,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 3303
    },
    {
      "epoch": 0.03304,
      "grad_norm": 0.7259197244925034,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 3304
    },
    {
      "epoch": 0.03305,
      "grad_norm": 0.6328132146169181,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 3305
    },
    {
      "epoch": 0.03306,
      "grad_norm": 0.680683566379763,
      "learning_rate": 0.003,
      "loss": 4.1365,
      "step": 3306
    },
    {
      "epoch": 0.03307,
      "grad_norm": 0.6927470498017374,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 3307
    },
    {
      "epoch": 0.03308,
      "grad_norm": 0.6675628417853611,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 3308
    },
    {
      "epoch": 0.03309,
      "grad_norm": 0.7202067492998604,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 3309
    },
    {
      "epoch": 0.0331,
      "grad_norm": 0.7699244574031957,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 3310
    },
    {
      "epoch": 0.03311,
      "grad_norm": 0.8693826092494591,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 3311
    },
    {
      "epoch": 0.03312,
      "grad_norm": 1.172876733274895,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 3312
    },
    {
      "epoch": 0.03313,
      "grad_norm": 0.9783748676175467,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 3313
    },
    {
      "epoch": 0.03314,
      "grad_norm": 0.9677078544866878,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 3314
    },
    {
      "epoch": 0.03315,
      "grad_norm": 0.8922546596507663,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 3315
    },
    {
      "epoch": 0.03316,
      "grad_norm": 0.8601480724971649,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 3316
    },
    {
      "epoch": 0.03317,
      "grad_norm": 0.8689703079288349,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 3317
    },
    {
      "epoch": 0.03318,
      "grad_norm": 0.8404981974903554,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 3318
    },
    {
      "epoch": 0.03319,
      "grad_norm": 0.7816646536926355,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 3319
    },
    {
      "epoch": 0.0332,
      "grad_norm": 0.7686980103144088,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 3320
    },
    {
      "epoch": 0.03321,
      "grad_norm": 0.6688242043093231,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 3321
    },
    {
      "epoch": 0.03322,
      "grad_norm": 0.6171612564014062,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 3322
    },
    {
      "epoch": 0.03323,
      "grad_norm": 0.5970300363358997,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 3323
    },
    {
      "epoch": 0.03324,
      "grad_norm": 0.5953328227942196,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 3324
    },
    {
      "epoch": 0.03325,
      "grad_norm": 0.6169531392155312,
      "learning_rate": 0.003,
      "loss": 4.1268,
      "step": 3325
    },
    {
      "epoch": 0.03326,
      "grad_norm": 0.7413959337275615,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 3326
    },
    {
      "epoch": 0.03327,
      "grad_norm": 0.9232832965259093,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 3327
    },
    {
      "epoch": 0.03328,
      "grad_norm": 1.0485483129958542,
      "learning_rate": 0.003,
      "loss": 4.1432,
      "step": 3328
    },
    {
      "epoch": 0.03329,
      "grad_norm": 0.8420326285667852,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 3329
    },
    {
      "epoch": 0.0333,
      "grad_norm": 0.7052108452174394,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 3330
    },
    {
      "epoch": 0.03331,
      "grad_norm": 0.728092857943501,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 3331
    },
    {
      "epoch": 0.03332,
      "grad_norm": 0.9200654034817243,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 3332
    },
    {
      "epoch": 0.03333,
      "grad_norm": 0.9758232395574222,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 3333
    },
    {
      "epoch": 0.03334,
      "grad_norm": 1.0419984825931974,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 3334
    },
    {
      "epoch": 0.03335,
      "grad_norm": 0.9936868073161941,
      "learning_rate": 0.003,
      "loss": 4.1367,
      "step": 3335
    },
    {
      "epoch": 0.03336,
      "grad_norm": 0.9188784917760547,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 3336
    },
    {
      "epoch": 0.03337,
      "grad_norm": 0.9676171269128566,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 3337
    },
    {
      "epoch": 0.03338,
      "grad_norm": 1.2398133112982033,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 3338
    },
    {
      "epoch": 0.03339,
      "grad_norm": 0.8757441136351082,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 3339
    },
    {
      "epoch": 0.0334,
      "grad_norm": 0.803431258859875,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 3340
    },
    {
      "epoch": 0.03341,
      "grad_norm": 0.7508144599659284,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 3341
    },
    {
      "epoch": 0.03342,
      "grad_norm": 0.6682938205970289,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 3342
    },
    {
      "epoch": 0.03343,
      "grad_norm": 0.6286049563460856,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 3343
    },
    {
      "epoch": 0.03344,
      "grad_norm": 0.7479131859288655,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 3344
    },
    {
      "epoch": 0.03345,
      "grad_norm": 0.9317587885861235,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 3345
    },
    {
      "epoch": 0.03346,
      "grad_norm": 1.1810549914532213,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 3346
    },
    {
      "epoch": 0.03347,
      "grad_norm": 0.803326085791172,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 3347
    },
    {
      "epoch": 0.03348,
      "grad_norm": 0.6682684868194113,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 3348
    },
    {
      "epoch": 0.03349,
      "grad_norm": 0.7128883398636897,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 3349
    },
    {
      "epoch": 0.0335,
      "grad_norm": 0.8166001203873546,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 3350
    },
    {
      "epoch": 0.03351,
      "grad_norm": 0.8984169700598436,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 3351
    },
    {
      "epoch": 0.03352,
      "grad_norm": 0.891385033781036,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 3352
    },
    {
      "epoch": 0.03353,
      "grad_norm": 0.7436503547090926,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 3353
    },
    {
      "epoch": 0.03354,
      "grad_norm": 0.7000314128832471,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 3354
    },
    {
      "epoch": 0.03355,
      "grad_norm": 0.8076872538085662,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 3355
    },
    {
      "epoch": 0.03356,
      "grad_norm": 0.9065676166442113,
      "learning_rate": 0.003,
      "loss": 4.141,
      "step": 3356
    },
    {
      "epoch": 0.03357,
      "grad_norm": 0.8283445403636854,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 3357
    },
    {
      "epoch": 0.03358,
      "grad_norm": 0.711728954112842,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 3358
    },
    {
      "epoch": 0.03359,
      "grad_norm": 0.6700966992589463,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 3359
    },
    {
      "epoch": 0.0336,
      "grad_norm": 0.6990021919801547,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 3360
    },
    {
      "epoch": 0.03361,
      "grad_norm": 0.6512206263292598,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 3361
    },
    {
      "epoch": 0.03362,
      "grad_norm": 0.6471137948913391,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 3362
    },
    {
      "epoch": 0.03363,
      "grad_norm": 0.6486978718096742,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 3363
    },
    {
      "epoch": 0.03364,
      "grad_norm": 0.7056083808952703,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 3364
    },
    {
      "epoch": 0.03365,
      "grad_norm": 0.7889254390774209,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 3365
    },
    {
      "epoch": 0.03366,
      "grad_norm": 0.8858274543513762,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 3366
    },
    {
      "epoch": 0.03367,
      "grad_norm": 0.8996550932306419,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 3367
    },
    {
      "epoch": 0.03368,
      "grad_norm": 0.9535511924749507,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 3368
    },
    {
      "epoch": 0.03369,
      "grad_norm": 1.025280504328262,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 3369
    },
    {
      "epoch": 0.0337,
      "grad_norm": 0.9606870288728299,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 3370
    },
    {
      "epoch": 0.03371,
      "grad_norm": 0.911846692093779,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 3371
    },
    {
      "epoch": 0.03372,
      "grad_norm": 0.8892950330733644,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 3372
    },
    {
      "epoch": 0.03373,
      "grad_norm": 0.9613355807854816,
      "learning_rate": 0.003,
      "loss": 4.1365,
      "step": 3373
    },
    {
      "epoch": 0.03374,
      "grad_norm": 0.9293495359333047,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 3374
    },
    {
      "epoch": 0.03375,
      "grad_norm": 0.9945254272923405,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 3375
    },
    {
      "epoch": 0.03376,
      "grad_norm": 0.9458311049733065,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 3376
    },
    {
      "epoch": 0.03377,
      "grad_norm": 1.0143873093094964,
      "learning_rate": 0.003,
      "loss": 4.1348,
      "step": 3377
    },
    {
      "epoch": 0.03378,
      "grad_norm": 0.9867149119212901,
      "learning_rate": 0.003,
      "loss": 4.1415,
      "step": 3378
    },
    {
      "epoch": 0.03379,
      "grad_norm": 0.909410152436646,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 3379
    },
    {
      "epoch": 0.0338,
      "grad_norm": 0.9582033813869726,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 3380
    },
    {
      "epoch": 0.03381,
      "grad_norm": 1.0515732546439378,
      "learning_rate": 0.003,
      "loss": 4.1483,
      "step": 3381
    },
    {
      "epoch": 0.03382,
      "grad_norm": 0.9000543481782362,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 3382
    },
    {
      "epoch": 0.03383,
      "grad_norm": 0.7851905547422763,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 3383
    },
    {
      "epoch": 0.03384,
      "grad_norm": 0.7425344508528795,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 3384
    },
    {
      "epoch": 0.03385,
      "grad_norm": 0.7242449162586684,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 3385
    },
    {
      "epoch": 0.03386,
      "grad_norm": 0.7178134850877214,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 3386
    },
    {
      "epoch": 0.03387,
      "grad_norm": 0.6034717016906057,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 3387
    },
    {
      "epoch": 0.03388,
      "grad_norm": 0.6250385791221151,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 3388
    },
    {
      "epoch": 0.03389,
      "grad_norm": 0.5925814922250926,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 3389
    },
    {
      "epoch": 0.0339,
      "grad_norm": 0.6938565572265246,
      "learning_rate": 0.003,
      "loss": 4.1269,
      "step": 3390
    },
    {
      "epoch": 0.03391,
      "grad_norm": 0.8588428651999951,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 3391
    },
    {
      "epoch": 0.03392,
      "grad_norm": 1.0828213831544788,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 3392
    },
    {
      "epoch": 0.03393,
      "grad_norm": 0.9139268583774075,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 3393
    },
    {
      "epoch": 0.03394,
      "grad_norm": 0.8962526152154793,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 3394
    },
    {
      "epoch": 0.03395,
      "grad_norm": 0.9550321739497196,
      "learning_rate": 0.003,
      "loss": 4.1325,
      "step": 3395
    },
    {
      "epoch": 0.03396,
      "grad_norm": 0.9058142633647185,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 3396
    },
    {
      "epoch": 0.03397,
      "grad_norm": 1.030198089545334,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 3397
    },
    {
      "epoch": 0.03398,
      "grad_norm": 0.9817159965257135,
      "learning_rate": 0.003,
      "loss": 4.1455,
      "step": 3398
    },
    {
      "epoch": 0.03399,
      "grad_norm": 0.7537874005656849,
      "learning_rate": 0.003,
      "loss": 4.1585,
      "step": 3399
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.7534398090951075,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 3400
    },
    {
      "epoch": 0.03401,
      "grad_norm": 0.846711011216498,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 3401
    },
    {
      "epoch": 0.03402,
      "grad_norm": 1.0304564422923,
      "learning_rate": 0.003,
      "loss": 4.1474,
      "step": 3402
    },
    {
      "epoch": 0.03403,
      "grad_norm": 0.8955445182551958,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 3403
    },
    {
      "epoch": 0.03404,
      "grad_norm": 0.8683725321318734,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 3404
    },
    {
      "epoch": 0.03405,
      "grad_norm": 0.8700430489216084,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 3405
    },
    {
      "epoch": 0.03406,
      "grad_norm": 0.856851032889231,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 3406
    },
    {
      "epoch": 0.03407,
      "grad_norm": 0.8583304198024593,
      "learning_rate": 0.003,
      "loss": 4.1254,
      "step": 3407
    },
    {
      "epoch": 0.03408,
      "grad_norm": 0.8259652751248255,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 3408
    },
    {
      "epoch": 0.03409,
      "grad_norm": 0.9359241584231073,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 3409
    },
    {
      "epoch": 0.0341,
      "grad_norm": 0.938170801150728,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 3410
    },
    {
      "epoch": 0.03411,
      "grad_norm": 0.9444575938733144,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 3411
    },
    {
      "epoch": 0.03412,
      "grad_norm": 0.9224218450030172,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 3412
    },
    {
      "epoch": 0.03413,
      "grad_norm": 0.9312506396494133,
      "learning_rate": 0.003,
      "loss": 4.1534,
      "step": 3413
    },
    {
      "epoch": 0.03414,
      "grad_norm": 0.9761972676777384,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 3414
    },
    {
      "epoch": 0.03415,
      "grad_norm": 1.189613033448552,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 3415
    },
    {
      "epoch": 0.03416,
      "grad_norm": 0.8255201977611025,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 3416
    },
    {
      "epoch": 0.03417,
      "grad_norm": 0.8947733285821016,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 3417
    },
    {
      "epoch": 0.03418,
      "grad_norm": 0.7984406024805614,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 3418
    },
    {
      "epoch": 0.03419,
      "grad_norm": 0.6630379610145436,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 3419
    },
    {
      "epoch": 0.0342,
      "grad_norm": 0.5975770128227218,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 3420
    },
    {
      "epoch": 0.03421,
      "grad_norm": 0.6059481665025913,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 3421
    },
    {
      "epoch": 0.03422,
      "grad_norm": 0.6007015602316165,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 3422
    },
    {
      "epoch": 0.03423,
      "grad_norm": 0.6213374488193623,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 3423
    },
    {
      "epoch": 0.03424,
      "grad_norm": 0.7357379283476251,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 3424
    },
    {
      "epoch": 0.03425,
      "grad_norm": 1.1388627740722068,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 3425
    },
    {
      "epoch": 0.03426,
      "grad_norm": 1.2676165376349322,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 3426
    },
    {
      "epoch": 0.03427,
      "grad_norm": 0.8591095580169765,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 3427
    },
    {
      "epoch": 0.03428,
      "grad_norm": 0.7637078879713047,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 3428
    },
    {
      "epoch": 0.03429,
      "grad_norm": 0.7598510773275634,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 3429
    },
    {
      "epoch": 0.0343,
      "grad_norm": 0.7189854251191673,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 3430
    },
    {
      "epoch": 0.03431,
      "grad_norm": 0.6960848938203806,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 3431
    },
    {
      "epoch": 0.03432,
      "grad_norm": 0.7905143093567869,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 3432
    },
    {
      "epoch": 0.03433,
      "grad_norm": 0.6763604081182782,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 3433
    },
    {
      "epoch": 0.03434,
      "grad_norm": 0.6584657241744457,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 3434
    },
    {
      "epoch": 0.03435,
      "grad_norm": 0.6146074495577768,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 3435
    },
    {
      "epoch": 0.03436,
      "grad_norm": 0.6821260061437763,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 3436
    },
    {
      "epoch": 0.03437,
      "grad_norm": 0.7002392953868479,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 3437
    },
    {
      "epoch": 0.03438,
      "grad_norm": 0.6865303492632201,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 3438
    },
    {
      "epoch": 0.03439,
      "grad_norm": 0.6862765139756455,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 3439
    },
    {
      "epoch": 0.0344,
      "grad_norm": 0.7374953588444623,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 3440
    },
    {
      "epoch": 0.03441,
      "grad_norm": 0.8034314810869642,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 3441
    },
    {
      "epoch": 0.03442,
      "grad_norm": 0.9713109980816932,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 3442
    },
    {
      "epoch": 0.03443,
      "grad_norm": 1.2417830843481585,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 3443
    },
    {
      "epoch": 0.03444,
      "grad_norm": 0.9221286248093234,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 3444
    },
    {
      "epoch": 0.03445,
      "grad_norm": 0.95846171622353,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 3445
    },
    {
      "epoch": 0.03446,
      "grad_norm": 1.0528525400170592,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 3446
    },
    {
      "epoch": 0.03447,
      "grad_norm": 1.0620743717942975,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 3447
    },
    {
      "epoch": 0.03448,
      "grad_norm": 1.0787590252225165,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 3448
    },
    {
      "epoch": 0.03449,
      "grad_norm": 1.078404883653542,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 3449
    },
    {
      "epoch": 0.0345,
      "grad_norm": 1.1540054715604005,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 3450
    },
    {
      "epoch": 0.03451,
      "grad_norm": 0.8053901369440304,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 3451
    },
    {
      "epoch": 0.03452,
      "grad_norm": 0.8242631664830717,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 3452
    },
    {
      "epoch": 0.03453,
      "grad_norm": 0.8626897904731716,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 3453
    },
    {
      "epoch": 0.03454,
      "grad_norm": 0.8237109468250721,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 3454
    },
    {
      "epoch": 0.03455,
      "grad_norm": 0.804980006239357,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 3455
    },
    {
      "epoch": 0.03456,
      "grad_norm": 0.729280374546535,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 3456
    },
    {
      "epoch": 0.03457,
      "grad_norm": 0.7776253504611971,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 3457
    },
    {
      "epoch": 0.03458,
      "grad_norm": 0.7417498419116878,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 3458
    },
    {
      "epoch": 0.03459,
      "grad_norm": 0.6919191119122285,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 3459
    },
    {
      "epoch": 0.0346,
      "grad_norm": 0.6964165299460008,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 3460
    },
    {
      "epoch": 0.03461,
      "grad_norm": 0.715480299476076,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 3461
    },
    {
      "epoch": 0.03462,
      "grad_norm": 0.7536303091133582,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 3462
    },
    {
      "epoch": 0.03463,
      "grad_norm": 0.7082893489121429,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 3463
    },
    {
      "epoch": 0.03464,
      "grad_norm": 0.6417430899292896,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 3464
    },
    {
      "epoch": 0.03465,
      "grad_norm": 0.6337345648035622,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 3465
    },
    {
      "epoch": 0.03466,
      "grad_norm": 0.6853326694383307,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 3466
    },
    {
      "epoch": 0.03467,
      "grad_norm": 0.6969053686300763,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 3467
    },
    {
      "epoch": 0.03468,
      "grad_norm": 0.7399108221307881,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 3468
    },
    {
      "epoch": 0.03469,
      "grad_norm": 0.8787869181598735,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 3469
    },
    {
      "epoch": 0.0347,
      "grad_norm": 1.0101947797716855,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 3470
    },
    {
      "epoch": 0.03471,
      "grad_norm": 1.1334674964796345,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 3471
    },
    {
      "epoch": 0.03472,
      "grad_norm": 0.8368294781332597,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 3472
    },
    {
      "epoch": 0.03473,
      "grad_norm": 0.8757089713498055,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 3473
    },
    {
      "epoch": 0.03474,
      "grad_norm": 0.8138215418551573,
      "learning_rate": 0.003,
      "loss": 4.1375,
      "step": 3474
    },
    {
      "epoch": 0.03475,
      "grad_norm": 0.9122035240346946,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 3475
    },
    {
      "epoch": 0.03476,
      "grad_norm": 1.0107344329113028,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 3476
    },
    {
      "epoch": 0.03477,
      "grad_norm": 1.0671405774179539,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 3477
    },
    {
      "epoch": 0.03478,
      "grad_norm": 1.011473839771263,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 3478
    },
    {
      "epoch": 0.03479,
      "grad_norm": 1.0143839143452251,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 3479
    },
    {
      "epoch": 0.0348,
      "grad_norm": 1.01956672356136,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 3480
    },
    {
      "epoch": 0.03481,
      "grad_norm": 0.8544300973866115,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 3481
    },
    {
      "epoch": 0.03482,
      "grad_norm": 1.0322476593810674,
      "learning_rate": 0.003,
      "loss": 4.133,
      "step": 3482
    },
    {
      "epoch": 0.03483,
      "grad_norm": 1.1510621333444928,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 3483
    },
    {
      "epoch": 0.03484,
      "grad_norm": 0.929657295041821,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 3484
    },
    {
      "epoch": 0.03485,
      "grad_norm": 0.9553453027677797,
      "learning_rate": 0.003,
      "loss": 4.146,
      "step": 3485
    },
    {
      "epoch": 0.03486,
      "grad_norm": 0.9940865088692804,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 3486
    },
    {
      "epoch": 0.03487,
      "grad_norm": 0.9250098589178641,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 3487
    },
    {
      "epoch": 0.03488,
      "grad_norm": 0.9460673526858474,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 3488
    },
    {
      "epoch": 0.03489,
      "grad_norm": 0.9687600332195162,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 3489
    },
    {
      "epoch": 0.0349,
      "grad_norm": 1.0784208838544622,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 3490
    },
    {
      "epoch": 0.03491,
      "grad_norm": 0.9346645940080963,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 3491
    },
    {
      "epoch": 0.03492,
      "grad_norm": 0.8257240466605443,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 3492
    },
    {
      "epoch": 0.03493,
      "grad_norm": 0.6651031888705147,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 3493
    },
    {
      "epoch": 0.03494,
      "grad_norm": 0.6641292093348049,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 3494
    },
    {
      "epoch": 0.03495,
      "grad_norm": 0.7091189628573692,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 3495
    },
    {
      "epoch": 0.03496,
      "grad_norm": 0.6860863869815901,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 3496
    },
    {
      "epoch": 0.03497,
      "grad_norm": 0.6717227669489184,
      "learning_rate": 0.003,
      "loss": 4.1269,
      "step": 3497
    },
    {
      "epoch": 0.03498,
      "grad_norm": 0.6858788932767997,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 3498
    },
    {
      "epoch": 0.03499,
      "grad_norm": 0.6840439194259218,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 3499
    },
    {
      "epoch": 0.035,
      "grad_norm": 0.7718701545173198,
      "learning_rate": 0.003,
      "loss": 4.1217,
      "step": 3500
    },
    {
      "epoch": 0.03501,
      "grad_norm": 0.9467030386692938,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 3501
    },
    {
      "epoch": 0.03502,
      "grad_norm": 1.1344054626766904,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 3502
    },
    {
      "epoch": 0.03503,
      "grad_norm": 0.8227576423331606,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 3503
    },
    {
      "epoch": 0.03504,
      "grad_norm": 0.7907161997976049,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 3504
    },
    {
      "epoch": 0.03505,
      "grad_norm": 0.7949755989311121,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 3505
    },
    {
      "epoch": 0.03506,
      "grad_norm": 0.8242114597731752,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 3506
    },
    {
      "epoch": 0.03507,
      "grad_norm": 0.827615657879298,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 3507
    },
    {
      "epoch": 0.03508,
      "grad_norm": 0.8269261240893123,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 3508
    },
    {
      "epoch": 0.03509,
      "grad_norm": 0.8828534433530547,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 3509
    },
    {
      "epoch": 0.0351,
      "grad_norm": 1.0949297929244133,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 3510
    },
    {
      "epoch": 0.03511,
      "grad_norm": 0.8474490719439868,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 3511
    },
    {
      "epoch": 0.03512,
      "grad_norm": 0.6104860735283346,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 3512
    },
    {
      "epoch": 0.03513,
      "grad_norm": 0.6050004310996162,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 3513
    },
    {
      "epoch": 0.03514,
      "grad_norm": 0.6520473843771001,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 3514
    },
    {
      "epoch": 0.03515,
      "grad_norm": 0.6055869327734182,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 3515
    },
    {
      "epoch": 0.03516,
      "grad_norm": 0.6086175138721427,
      "learning_rate": 0.003,
      "loss": 4.136,
      "step": 3516
    },
    {
      "epoch": 0.03517,
      "grad_norm": 0.6697942755212305,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 3517
    },
    {
      "epoch": 0.03518,
      "grad_norm": 0.7287441549864627,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 3518
    },
    {
      "epoch": 0.03519,
      "grad_norm": 0.6795543307624995,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 3519
    },
    {
      "epoch": 0.0352,
      "grad_norm": 0.5583215165075127,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 3520
    },
    {
      "epoch": 0.03521,
      "grad_norm": 0.6145999630971761,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 3521
    },
    {
      "epoch": 0.03522,
      "grad_norm": 0.6235299979345595,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 3522
    },
    {
      "epoch": 0.03523,
      "grad_norm": 0.6982274043250706,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 3523
    },
    {
      "epoch": 0.03524,
      "grad_norm": 0.8135221205032699,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 3524
    },
    {
      "epoch": 0.03525,
      "grad_norm": 0.9550932431193345,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 3525
    },
    {
      "epoch": 0.03526,
      "grad_norm": 0.9769559861681798,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 3526
    },
    {
      "epoch": 0.03527,
      "grad_norm": 0.9916459776325028,
      "learning_rate": 0.003,
      "loss": 4.1281,
      "step": 3527
    },
    {
      "epoch": 0.03528,
      "grad_norm": 1.161858614363555,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 3528
    },
    {
      "epoch": 0.03529,
      "grad_norm": 0.8840893570988297,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 3529
    },
    {
      "epoch": 0.0353,
      "grad_norm": 0.9045833698319836,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 3530
    },
    {
      "epoch": 0.03531,
      "grad_norm": 1.0824064864090153,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 3531
    },
    {
      "epoch": 0.03532,
      "grad_norm": 1.04041676924932,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 3532
    },
    {
      "epoch": 0.03533,
      "grad_norm": 1.0971254119119307,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 3533
    },
    {
      "epoch": 0.03534,
      "grad_norm": 0.9892749396028874,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 3534
    },
    {
      "epoch": 0.03535,
      "grad_norm": 0.8985518501036314,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 3535
    },
    {
      "epoch": 0.03536,
      "grad_norm": 0.7993862708923696,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 3536
    },
    {
      "epoch": 0.03537,
      "grad_norm": 0.8735445800029786,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 3537
    },
    {
      "epoch": 0.03538,
      "grad_norm": 0.9507196424596396,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 3538
    },
    {
      "epoch": 0.03539,
      "grad_norm": 0.9320860917835899,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 3539
    },
    {
      "epoch": 0.0354,
      "grad_norm": 1.0666914195552315,
      "learning_rate": 0.003,
      "loss": 4.1405,
      "step": 3540
    },
    {
      "epoch": 0.03541,
      "grad_norm": 0.9666553633993341,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 3541
    },
    {
      "epoch": 0.03542,
      "grad_norm": 1.075546280507871,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 3542
    },
    {
      "epoch": 0.03543,
      "grad_norm": 0.8581109533371994,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 3543
    },
    {
      "epoch": 0.03544,
      "grad_norm": 0.8504542868770893,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 3544
    },
    {
      "epoch": 0.03545,
      "grad_norm": 0.8221740518533669,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 3545
    },
    {
      "epoch": 0.03546,
      "grad_norm": 0.856238149878472,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 3546
    },
    {
      "epoch": 0.03547,
      "grad_norm": 0.8136205837732846,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 3547
    },
    {
      "epoch": 0.03548,
      "grad_norm": 0.9120415275886503,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 3548
    },
    {
      "epoch": 0.03549,
      "grad_norm": 0.8741924577842357,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 3549
    },
    {
      "epoch": 0.0355,
      "grad_norm": 0.964690101454288,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 3550
    },
    {
      "epoch": 0.03551,
      "grad_norm": 1.1078408326241436,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 3551
    },
    {
      "epoch": 0.03552,
      "grad_norm": 0.8276304700566389,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 3552
    },
    {
      "epoch": 0.03553,
      "grad_norm": 0.6988104369223903,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 3553
    },
    {
      "epoch": 0.03554,
      "grad_norm": 0.8099473741524069,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 3554
    },
    {
      "epoch": 0.03555,
      "grad_norm": 0.7657626977957569,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 3555
    },
    {
      "epoch": 0.03556,
      "grad_norm": 0.6884161752719029,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 3556
    },
    {
      "epoch": 0.03557,
      "grad_norm": 0.6257341319138348,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 3557
    },
    {
      "epoch": 0.03558,
      "grad_norm": 0.6075327774193832,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 3558
    },
    {
      "epoch": 0.03559,
      "grad_norm": 0.7848274408146922,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 3559
    },
    {
      "epoch": 0.0356,
      "grad_norm": 1.0374578386942082,
      "learning_rate": 0.003,
      "loss": 4.1241,
      "step": 3560
    },
    {
      "epoch": 0.03561,
      "grad_norm": 1.0758010083557943,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 3561
    },
    {
      "epoch": 0.03562,
      "grad_norm": 0.7322873944575906,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 3562
    },
    {
      "epoch": 0.03563,
      "grad_norm": 0.7971284325210378,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 3563
    },
    {
      "epoch": 0.03564,
      "grad_norm": 1.0246051569009555,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 3564
    },
    {
      "epoch": 0.03565,
      "grad_norm": 1.2267527206427153,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 3565
    },
    {
      "epoch": 0.03566,
      "grad_norm": 0.8984275051065349,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 3566
    },
    {
      "epoch": 0.03567,
      "grad_norm": 0.9814381465438691,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 3567
    },
    {
      "epoch": 0.03568,
      "grad_norm": 0.862338747629593,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 3568
    },
    {
      "epoch": 0.03569,
      "grad_norm": 0.8507776080373626,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 3569
    },
    {
      "epoch": 0.0357,
      "grad_norm": 0.8450119567501945,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 3570
    },
    {
      "epoch": 0.03571,
      "grad_norm": 0.8474288365898862,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 3571
    },
    {
      "epoch": 0.03572,
      "grad_norm": 0.8860990170054822,
      "learning_rate": 0.003,
      "loss": 4.1365,
      "step": 3572
    },
    {
      "epoch": 0.03573,
      "grad_norm": 0.9378112912832053,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 3573
    },
    {
      "epoch": 0.03574,
      "grad_norm": 0.7793011201012979,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 3574
    },
    {
      "epoch": 0.03575,
      "grad_norm": 0.6347551630020188,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 3575
    },
    {
      "epoch": 0.03576,
      "grad_norm": 0.6273033455713914,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 3576
    },
    {
      "epoch": 0.03577,
      "grad_norm": 0.5948625546551696,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 3577
    },
    {
      "epoch": 0.03578,
      "grad_norm": 0.5805191268794814,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 3578
    },
    {
      "epoch": 0.03579,
      "grad_norm": 0.5381920460033986,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 3579
    },
    {
      "epoch": 0.0358,
      "grad_norm": 0.7081167212858254,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 3580
    },
    {
      "epoch": 0.03581,
      "grad_norm": 0.7386340619770309,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 3581
    },
    {
      "epoch": 0.03582,
      "grad_norm": 0.7587714591488295,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 3582
    },
    {
      "epoch": 0.03583,
      "grad_norm": 0.7113793632761408,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 3583
    },
    {
      "epoch": 0.03584,
      "grad_norm": 0.7188054852978065,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 3584
    },
    {
      "epoch": 0.03585,
      "grad_norm": 0.8853261674987007,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 3585
    },
    {
      "epoch": 0.03586,
      "grad_norm": 0.92627880930667,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 3586
    },
    {
      "epoch": 0.03587,
      "grad_norm": 0.9331834952096867,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 3587
    },
    {
      "epoch": 0.03588,
      "grad_norm": 1.056355930051097,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 3588
    },
    {
      "epoch": 0.03589,
      "grad_norm": 0.8554656185849556,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 3589
    },
    {
      "epoch": 0.0359,
      "grad_norm": 0.7792232208377492,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 3590
    },
    {
      "epoch": 0.03591,
      "grad_norm": 0.9342912310042636,
      "learning_rate": 0.003,
      "loss": 4.1217,
      "step": 3591
    },
    {
      "epoch": 0.03592,
      "grad_norm": 1.0503229079193128,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 3592
    },
    {
      "epoch": 0.03593,
      "grad_norm": 0.984608248076784,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 3593
    },
    {
      "epoch": 0.03594,
      "grad_norm": 0.9992015208659966,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 3594
    },
    {
      "epoch": 0.03595,
      "grad_norm": 0.9486124739849024,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 3595
    },
    {
      "epoch": 0.03596,
      "grad_norm": 0.8044450679529417,
      "learning_rate": 0.003,
      "loss": 4.1375,
      "step": 3596
    },
    {
      "epoch": 0.03597,
      "grad_norm": 0.7529472117586564,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 3597
    },
    {
      "epoch": 0.03598,
      "grad_norm": 0.8016519620578548,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 3598
    },
    {
      "epoch": 0.03599,
      "grad_norm": 0.8552258797660491,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 3599
    },
    {
      "epoch": 0.036,
      "grad_norm": 1.122743201613801,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 3600
    },
    {
      "epoch": 0.03601,
      "grad_norm": 0.9558780975359691,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 3601
    },
    {
      "epoch": 0.03602,
      "grad_norm": 1.0399279121526195,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 3602
    },
    {
      "epoch": 0.03603,
      "grad_norm": 1.1551062763334126,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 3603
    },
    {
      "epoch": 0.03604,
      "grad_norm": 1.0548506296076239,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 3604
    },
    {
      "epoch": 0.03605,
      "grad_norm": 1.001983402921064,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 3605
    },
    {
      "epoch": 0.03606,
      "grad_norm": 1.0312319108923618,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 3606
    },
    {
      "epoch": 0.03607,
      "grad_norm": 0.8219603208628181,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 3607
    },
    {
      "epoch": 0.03608,
      "grad_norm": 0.6498677677486123,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 3608
    },
    {
      "epoch": 0.03609,
      "grad_norm": 0.7278852617397139,
      "learning_rate": 0.003,
      "loss": 4.1257,
      "step": 3609
    },
    {
      "epoch": 0.0361,
      "grad_norm": 0.6666501580008998,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 3610
    },
    {
      "epoch": 0.03611,
      "grad_norm": 0.6972920943122692,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 3611
    },
    {
      "epoch": 0.03612,
      "grad_norm": 0.8293279199659314,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 3612
    },
    {
      "epoch": 0.03613,
      "grad_norm": 0.8280160598898497,
      "learning_rate": 0.003,
      "loss": 4.1168,
      "step": 3613
    },
    {
      "epoch": 0.03614,
      "grad_norm": 0.8586511775157879,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 3614
    },
    {
      "epoch": 0.03615,
      "grad_norm": 0.8273149005356012,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 3615
    },
    {
      "epoch": 0.03616,
      "grad_norm": 0.7395998497715127,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 3616
    },
    {
      "epoch": 0.03617,
      "grad_norm": 0.6904839766505554,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 3617
    },
    {
      "epoch": 0.03618,
      "grad_norm": 0.7602021438934125,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 3618
    },
    {
      "epoch": 0.03619,
      "grad_norm": 0.9970287144190367,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 3619
    },
    {
      "epoch": 0.0362,
      "grad_norm": 1.0227512354316275,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 3620
    },
    {
      "epoch": 0.03621,
      "grad_norm": 0.8480434130474432,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 3621
    },
    {
      "epoch": 0.03622,
      "grad_norm": 0.9271509010196743,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 3622
    },
    {
      "epoch": 0.03623,
      "grad_norm": 1.022948927798352,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 3623
    },
    {
      "epoch": 0.03624,
      "grad_norm": 0.9265641312425992,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 3624
    },
    {
      "epoch": 0.03625,
      "grad_norm": 0.7813804518175715,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 3625
    },
    {
      "epoch": 0.03626,
      "grad_norm": 0.7288040056584648,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 3626
    },
    {
      "epoch": 0.03627,
      "grad_norm": 0.7898367328534155,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 3627
    },
    {
      "epoch": 0.03628,
      "grad_norm": 0.7391500282099692,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 3628
    },
    {
      "epoch": 0.03629,
      "grad_norm": 0.7244163565447427,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 3629
    },
    {
      "epoch": 0.0363,
      "grad_norm": 0.8351668360599707,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 3630
    },
    {
      "epoch": 0.03631,
      "grad_norm": 1.1144221832925818,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 3631
    },
    {
      "epoch": 0.03632,
      "grad_norm": 0.9360712274389132,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 3632
    },
    {
      "epoch": 0.03633,
      "grad_norm": 0.9876936083254465,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 3633
    },
    {
      "epoch": 0.03634,
      "grad_norm": 0.955681965536761,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 3634
    },
    {
      "epoch": 0.03635,
      "grad_norm": 0.9023651977998233,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 3635
    },
    {
      "epoch": 0.03636,
      "grad_norm": 0.8274154329119018,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 3636
    },
    {
      "epoch": 0.03637,
      "grad_norm": 0.9898592340376765,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 3637
    },
    {
      "epoch": 0.03638,
      "grad_norm": 0.961405507097599,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 3638
    },
    {
      "epoch": 0.03639,
      "grad_norm": 1.0204413055029042,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 3639
    },
    {
      "epoch": 0.0364,
      "grad_norm": 0.9674075028743515,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 3640
    },
    {
      "epoch": 0.03641,
      "grad_norm": 1.0768346328712466,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 3641
    },
    {
      "epoch": 0.03642,
      "grad_norm": 0.9224514479840723,
      "learning_rate": 0.003,
      "loss": 4.1408,
      "step": 3642
    },
    {
      "epoch": 0.03643,
      "grad_norm": 1.0336265072437074,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 3643
    },
    {
      "epoch": 0.03644,
      "grad_norm": 0.8524724447399462,
      "learning_rate": 0.003,
      "loss": 4.1268,
      "step": 3644
    },
    {
      "epoch": 0.03645,
      "grad_norm": 0.746873753702558,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 3645
    },
    {
      "epoch": 0.03646,
      "grad_norm": 0.8800991852578832,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 3646
    },
    {
      "epoch": 0.03647,
      "grad_norm": 0.9340999264767859,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 3647
    },
    {
      "epoch": 0.03648,
      "grad_norm": 0.9312651206865153,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 3648
    },
    {
      "epoch": 0.03649,
      "grad_norm": 0.9165122392124831,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 3649
    },
    {
      "epoch": 0.0365,
      "grad_norm": 0.956526663147831,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 3650
    },
    {
      "epoch": 0.03651,
      "grad_norm": 0.8484726129203977,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 3651
    },
    {
      "epoch": 0.03652,
      "grad_norm": 0.7894465078154086,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 3652
    },
    {
      "epoch": 0.03653,
      "grad_norm": 0.6892303382737016,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 3653
    },
    {
      "epoch": 0.03654,
      "grad_norm": 0.6682166624701149,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 3654
    },
    {
      "epoch": 0.03655,
      "grad_norm": 0.6644222696456153,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 3655
    },
    {
      "epoch": 0.03656,
      "grad_norm": 0.6972374084601054,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 3656
    },
    {
      "epoch": 0.03657,
      "grad_norm": 0.6757860783494947,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 3657
    },
    {
      "epoch": 0.03658,
      "grad_norm": 0.7083328895452327,
      "learning_rate": 0.003,
      "loss": 4.1336,
      "step": 3658
    },
    {
      "epoch": 0.03659,
      "grad_norm": 0.7718946593018728,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 3659
    },
    {
      "epoch": 0.0366,
      "grad_norm": 0.7376732318109632,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 3660
    },
    {
      "epoch": 0.03661,
      "grad_norm": 0.8048594254747962,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 3661
    },
    {
      "epoch": 0.03662,
      "grad_norm": 0.9041684527522658,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 3662
    },
    {
      "epoch": 0.03663,
      "grad_norm": 0.8857606200128569,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 3663
    },
    {
      "epoch": 0.03664,
      "grad_norm": 0.9256759603717064,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 3664
    },
    {
      "epoch": 0.03665,
      "grad_norm": 0.9238563202936576,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 3665
    },
    {
      "epoch": 0.03666,
      "grad_norm": 0.8203365130003366,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 3666
    },
    {
      "epoch": 0.03667,
      "grad_norm": 0.9024471801382937,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 3667
    },
    {
      "epoch": 0.03668,
      "grad_norm": 1.0552967059744773,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 3668
    },
    {
      "epoch": 0.03669,
      "grad_norm": 1.0820390277779695,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 3669
    },
    {
      "epoch": 0.0367,
      "grad_norm": 0.9454674065362177,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 3670
    },
    {
      "epoch": 0.03671,
      "grad_norm": 1.0059330853811892,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 3671
    },
    {
      "epoch": 0.03672,
      "grad_norm": 1.2723317844660584,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 3672
    },
    {
      "epoch": 0.03673,
      "grad_norm": 0.8050766891971283,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 3673
    },
    {
      "epoch": 0.03674,
      "grad_norm": 0.8682507056177615,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 3674
    },
    {
      "epoch": 0.03675,
      "grad_norm": 1.0276133687150726,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 3675
    },
    {
      "epoch": 0.03676,
      "grad_norm": 1.0466449018255517,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 3676
    },
    {
      "epoch": 0.03677,
      "grad_norm": 0.9830229711282336,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 3677
    },
    {
      "epoch": 0.03678,
      "grad_norm": 0.9217069489116423,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 3678
    },
    {
      "epoch": 0.03679,
      "grad_norm": 1.0822177384710947,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 3679
    },
    {
      "epoch": 0.0368,
      "grad_norm": 1.0907486819128838,
      "learning_rate": 0.003,
      "loss": 4.1207,
      "step": 3680
    },
    {
      "epoch": 0.03681,
      "grad_norm": 0.8227793104650016,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 3681
    },
    {
      "epoch": 0.03682,
      "grad_norm": 0.7808126016328544,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 3682
    },
    {
      "epoch": 0.03683,
      "grad_norm": 0.7174400556511353,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 3683
    },
    {
      "epoch": 0.03684,
      "grad_norm": 0.6775479041332162,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 3684
    },
    {
      "epoch": 0.03685,
      "grad_norm": 0.6420088354614908,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 3685
    },
    {
      "epoch": 0.03686,
      "grad_norm": 0.598867465128322,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 3686
    },
    {
      "epoch": 0.03687,
      "grad_norm": 0.6115731068077563,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 3687
    },
    {
      "epoch": 0.03688,
      "grad_norm": 0.7067511764658732,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 3688
    },
    {
      "epoch": 0.03689,
      "grad_norm": 0.7020192080483855,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 3689
    },
    {
      "epoch": 0.0369,
      "grad_norm": 0.6267089096242754,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 3690
    },
    {
      "epoch": 0.03691,
      "grad_norm": 0.5456674081937926,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 3691
    },
    {
      "epoch": 0.03692,
      "grad_norm": 0.6082919076452641,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 3692
    },
    {
      "epoch": 0.03693,
      "grad_norm": 0.7939340117807796,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 3693
    },
    {
      "epoch": 0.03694,
      "grad_norm": 1.0013591289075594,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 3694
    },
    {
      "epoch": 0.03695,
      "grad_norm": 1.022061020358787,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 3695
    },
    {
      "epoch": 0.03696,
      "grad_norm": 0.6767445565263814,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 3696
    },
    {
      "epoch": 0.03697,
      "grad_norm": 0.6543502581869196,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 3697
    },
    {
      "epoch": 0.03698,
      "grad_norm": 0.898978343569856,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 3698
    },
    {
      "epoch": 0.03699,
      "grad_norm": 0.8905816524859695,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 3699
    },
    {
      "epoch": 0.037,
      "grad_norm": 0.7721797452363818,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 3700
    },
    {
      "epoch": 0.03701,
      "grad_norm": 0.9150903630097929,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 3701
    },
    {
      "epoch": 0.03702,
      "grad_norm": 0.846227592035782,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 3702
    },
    {
      "epoch": 0.03703,
      "grad_norm": 0.797196639545616,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 3703
    },
    {
      "epoch": 0.03704,
      "grad_norm": 0.9810543408588123,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 3704
    },
    {
      "epoch": 0.03705,
      "grad_norm": 1.0722834307776175,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 3705
    },
    {
      "epoch": 0.03706,
      "grad_norm": 0.9721358338604488,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 3706
    },
    {
      "epoch": 0.03707,
      "grad_norm": 0.9790738664900162,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 3707
    },
    {
      "epoch": 0.03708,
      "grad_norm": 0.9353887990568662,
      "learning_rate": 0.003,
      "loss": 4.1207,
      "step": 3708
    },
    {
      "epoch": 0.03709,
      "grad_norm": 0.9856206989122267,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 3709
    },
    {
      "epoch": 0.0371,
      "grad_norm": 1.0451787463045008,
      "learning_rate": 0.003,
      "loss": 4.138,
      "step": 3710
    },
    {
      "epoch": 0.03711,
      "grad_norm": 1.0753305901256527,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 3711
    },
    {
      "epoch": 0.03712,
      "grad_norm": 0.9432866794357371,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 3712
    },
    {
      "epoch": 0.03713,
      "grad_norm": 1.0228561402780971,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 3713
    },
    {
      "epoch": 0.03714,
      "grad_norm": 1.0155399617885337,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 3714
    },
    {
      "epoch": 0.03715,
      "grad_norm": 0.9501309057637405,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 3715
    },
    {
      "epoch": 0.03716,
      "grad_norm": 0.8368341993061407,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 3716
    },
    {
      "epoch": 0.03717,
      "grad_norm": 0.8200844350513596,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 3717
    },
    {
      "epoch": 0.03718,
      "grad_norm": 0.8192379479874041,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 3718
    },
    {
      "epoch": 0.03719,
      "grad_norm": 0.9007315714309677,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 3719
    },
    {
      "epoch": 0.0372,
      "grad_norm": 0.9179074609676963,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 3720
    },
    {
      "epoch": 0.03721,
      "grad_norm": 0.848049967304013,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 3721
    },
    {
      "epoch": 0.03722,
      "grad_norm": 0.7009453544906782,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 3722
    },
    {
      "epoch": 0.03723,
      "grad_norm": 0.6973322955990157,
      "learning_rate": 0.003,
      "loss": 4.1295,
      "step": 3723
    },
    {
      "epoch": 0.03724,
      "grad_norm": 0.6858684932844772,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 3724
    },
    {
      "epoch": 0.03725,
      "grad_norm": 0.5759232117551641,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 3725
    },
    {
      "epoch": 0.03726,
      "grad_norm": 0.7355358643002436,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 3726
    },
    {
      "epoch": 0.03727,
      "grad_norm": 0.975453002086878,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 3727
    },
    {
      "epoch": 0.03728,
      "grad_norm": 1.2871266145714035,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 3728
    },
    {
      "epoch": 0.03729,
      "grad_norm": 0.6800175356883061,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 3729
    },
    {
      "epoch": 0.0373,
      "grad_norm": 0.8995970728462417,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 3730
    },
    {
      "epoch": 0.03731,
      "grad_norm": 0.8748991092762279,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 3731
    },
    {
      "epoch": 0.03732,
      "grad_norm": 0.7674871139345869,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 3732
    },
    {
      "epoch": 0.03733,
      "grad_norm": 0.7725141224382513,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 3733
    },
    {
      "epoch": 0.03734,
      "grad_norm": 0.923649436345944,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 3734
    },
    {
      "epoch": 0.03735,
      "grad_norm": 0.85452781445576,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 3735
    },
    {
      "epoch": 0.03736,
      "grad_norm": 0.8691053620191994,
      "learning_rate": 0.003,
      "loss": 4.1222,
      "step": 3736
    },
    {
      "epoch": 0.03737,
      "grad_norm": 0.8505997578362463,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 3737
    },
    {
      "epoch": 0.03738,
      "grad_norm": 0.7742946039499282,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 3738
    },
    {
      "epoch": 0.03739,
      "grad_norm": 0.7994977680328913,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 3739
    },
    {
      "epoch": 0.0374,
      "grad_norm": 0.8536215986910768,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 3740
    },
    {
      "epoch": 0.03741,
      "grad_norm": 0.9409826319598602,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 3741
    },
    {
      "epoch": 0.03742,
      "grad_norm": 0.8593777917195926,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 3742
    },
    {
      "epoch": 0.03743,
      "grad_norm": 0.7563917031775911,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 3743
    },
    {
      "epoch": 0.03744,
      "grad_norm": 0.7103064287871557,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 3744
    },
    {
      "epoch": 0.03745,
      "grad_norm": 0.7615538447680247,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 3745
    },
    {
      "epoch": 0.03746,
      "grad_norm": 0.9955801956257352,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 3746
    },
    {
      "epoch": 0.03747,
      "grad_norm": 1.0171288172949948,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 3747
    },
    {
      "epoch": 0.03748,
      "grad_norm": 0.7896231689985856,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 3748
    },
    {
      "epoch": 0.03749,
      "grad_norm": 0.6375567183654871,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 3749
    },
    {
      "epoch": 0.0375,
      "grad_norm": 0.7372117367719312,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 3750
    },
    {
      "epoch": 0.03751,
      "grad_norm": 0.7461639244591386,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 3751
    },
    {
      "epoch": 0.03752,
      "grad_norm": 0.7827559728941343,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 3752
    },
    {
      "epoch": 0.03753,
      "grad_norm": 0.7771731000337748,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 3753
    },
    {
      "epoch": 0.03754,
      "grad_norm": 1.0118496549896825,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 3754
    },
    {
      "epoch": 0.03755,
      "grad_norm": 1.2993270154749912,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 3755
    },
    {
      "epoch": 0.03756,
      "grad_norm": 0.8028019558848566,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 3756
    },
    {
      "epoch": 0.03757,
      "grad_norm": 0.9071996665258343,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 3757
    },
    {
      "epoch": 0.03758,
      "grad_norm": 0.8773892948854304,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 3758
    },
    {
      "epoch": 0.03759,
      "grad_norm": 0.8198221769200625,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 3759
    },
    {
      "epoch": 0.0376,
      "grad_norm": 0.8540457248460523,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 3760
    },
    {
      "epoch": 0.03761,
      "grad_norm": 0.7667243415560708,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 3761
    },
    {
      "epoch": 0.03762,
      "grad_norm": 0.8865050328861852,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 3762
    },
    {
      "epoch": 0.03763,
      "grad_norm": 0.9591731803168125,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 3763
    },
    {
      "epoch": 0.03764,
      "grad_norm": 0.9480606119145719,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 3764
    },
    {
      "epoch": 0.03765,
      "grad_norm": 0.9684597356680353,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 3765
    },
    {
      "epoch": 0.03766,
      "grad_norm": 0.8344731198089695,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 3766
    },
    {
      "epoch": 0.03767,
      "grad_norm": 0.7765792550643911,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 3767
    },
    {
      "epoch": 0.03768,
      "grad_norm": 0.7642334739357078,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 3768
    },
    {
      "epoch": 0.03769,
      "grad_norm": 0.7211211499394363,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 3769
    },
    {
      "epoch": 0.0377,
      "grad_norm": 0.6106973291196678,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 3770
    },
    {
      "epoch": 0.03771,
      "grad_norm": 0.5628256340241241,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 3771
    },
    {
      "epoch": 0.03772,
      "grad_norm": 0.5845993807831852,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 3772
    },
    {
      "epoch": 0.03773,
      "grad_norm": 0.6697980766862262,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 3773
    },
    {
      "epoch": 0.03774,
      "grad_norm": 0.8252901528282155,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 3774
    },
    {
      "epoch": 0.03775,
      "grad_norm": 0.9517527171183254,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 3775
    },
    {
      "epoch": 0.03776,
      "grad_norm": 1.0897263182368793,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 3776
    },
    {
      "epoch": 0.03777,
      "grad_norm": 0.8653535869810033,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 3777
    },
    {
      "epoch": 0.03778,
      "grad_norm": 0.7342155379460369,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 3778
    },
    {
      "epoch": 0.03779,
      "grad_norm": 0.7679190695396096,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 3779
    },
    {
      "epoch": 0.0378,
      "grad_norm": 0.8679609457918785,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 3780
    },
    {
      "epoch": 0.03781,
      "grad_norm": 1.0049631112041457,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 3781
    },
    {
      "epoch": 0.03782,
      "grad_norm": 1.092459674146767,
      "learning_rate": 0.003,
      "loss": 4.1145,
      "step": 3782
    },
    {
      "epoch": 0.03783,
      "grad_norm": 0.9622890806365146,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 3783
    },
    {
      "epoch": 0.03784,
      "grad_norm": 1.0671033317728673,
      "learning_rate": 0.003,
      "loss": 4.1163,
      "step": 3784
    },
    {
      "epoch": 0.03785,
      "grad_norm": 1.0407807642051592,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 3785
    },
    {
      "epoch": 0.03786,
      "grad_norm": 1.125690448525855,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 3786
    },
    {
      "epoch": 0.03787,
      "grad_norm": 1.0253559423721736,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 3787
    },
    {
      "epoch": 0.03788,
      "grad_norm": 1.0136069570600932,
      "learning_rate": 0.003,
      "loss": 4.1383,
      "step": 3788
    },
    {
      "epoch": 0.03789,
      "grad_norm": 1.1118575565083433,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 3789
    },
    {
      "epoch": 0.0379,
      "grad_norm": 0.7950650221590647,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 3790
    },
    {
      "epoch": 0.03791,
      "grad_norm": 0.6642703034618491,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 3791
    },
    {
      "epoch": 0.03792,
      "grad_norm": 0.6118532884669681,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 3792
    },
    {
      "epoch": 0.03793,
      "grad_norm": 0.6538749880963972,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 3793
    },
    {
      "epoch": 0.03794,
      "grad_norm": 0.7185276449261756,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 3794
    },
    {
      "epoch": 0.03795,
      "grad_norm": 0.8476725115524525,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 3795
    },
    {
      "epoch": 0.03796,
      "grad_norm": 0.912186760325603,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 3796
    },
    {
      "epoch": 0.03797,
      "grad_norm": 0.8535289933328677,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 3797
    },
    {
      "epoch": 0.03798,
      "grad_norm": 0.8439916131790419,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 3798
    },
    {
      "epoch": 0.03799,
      "grad_norm": 0.977315385277912,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 3799
    },
    {
      "epoch": 0.038,
      "grad_norm": 1.2434381910310124,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 3800
    },
    {
      "epoch": 0.03801,
      "grad_norm": 0.8428497034893558,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 3801
    },
    {
      "epoch": 0.03802,
      "grad_norm": 0.7525631044925924,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 3802
    },
    {
      "epoch": 0.03803,
      "grad_norm": 0.8068378132076305,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 3803
    },
    {
      "epoch": 0.03804,
      "grad_norm": 0.8984500712876651,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 3804
    },
    {
      "epoch": 0.03805,
      "grad_norm": 0.9405144408554322,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 3805
    },
    {
      "epoch": 0.03806,
      "grad_norm": 1.019634044722222,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 3806
    },
    {
      "epoch": 0.03807,
      "grad_norm": 0.8853714667588649,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 3807
    },
    {
      "epoch": 0.03808,
      "grad_norm": 0.9597650598957446,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 3808
    },
    {
      "epoch": 0.03809,
      "grad_norm": 1.067683764868459,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 3809
    },
    {
      "epoch": 0.0381,
      "grad_norm": 0.9853327608719147,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 3810
    },
    {
      "epoch": 0.03811,
      "grad_norm": 1.1386409326086608,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 3811
    },
    {
      "epoch": 0.03812,
      "grad_norm": 0.8785836941800041,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 3812
    },
    {
      "epoch": 0.03813,
      "grad_norm": 0.719604168428238,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 3813
    },
    {
      "epoch": 0.03814,
      "grad_norm": 0.8437531723051056,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 3814
    },
    {
      "epoch": 0.03815,
      "grad_norm": 0.9017994260619784,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 3815
    },
    {
      "epoch": 0.03816,
      "grad_norm": 1.0098474951212937,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 3816
    },
    {
      "epoch": 0.03817,
      "grad_norm": 0.9361762835503497,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 3817
    },
    {
      "epoch": 0.03818,
      "grad_norm": 1.037066830030048,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 3818
    },
    {
      "epoch": 0.03819,
      "grad_norm": 1.156007843641428,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 3819
    },
    {
      "epoch": 0.0382,
      "grad_norm": 0.9923419777257736,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 3820
    },
    {
      "epoch": 0.03821,
      "grad_norm": 1.1885176826442838,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 3821
    },
    {
      "epoch": 0.03822,
      "grad_norm": 0.8699977531184279,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 3822
    },
    {
      "epoch": 0.03823,
      "grad_norm": 0.9310049147972063,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 3823
    },
    {
      "epoch": 0.03824,
      "grad_norm": 1.0606557350696768,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 3824
    },
    {
      "epoch": 0.03825,
      "grad_norm": 0.8761874314983482,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 3825
    },
    {
      "epoch": 0.03826,
      "grad_norm": 0.709566654404231,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 3826
    },
    {
      "epoch": 0.03827,
      "grad_norm": 0.665621012314629,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 3827
    },
    {
      "epoch": 0.03828,
      "grad_norm": 0.6539927553498923,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 3828
    },
    {
      "epoch": 0.03829,
      "grad_norm": 0.6659373242823122,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 3829
    },
    {
      "epoch": 0.0383,
      "grad_norm": 0.719614104181746,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 3830
    },
    {
      "epoch": 0.03831,
      "grad_norm": 0.7380247193128887,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 3831
    },
    {
      "epoch": 0.03832,
      "grad_norm": 0.7602873975407207,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 3832
    },
    {
      "epoch": 0.03833,
      "grad_norm": 0.7801412356291381,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 3833
    },
    {
      "epoch": 0.03834,
      "grad_norm": 1.035527155697683,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 3834
    },
    {
      "epoch": 0.03835,
      "grad_norm": 1.2017267869538604,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 3835
    },
    {
      "epoch": 0.03836,
      "grad_norm": 0.7780948979729766,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 3836
    },
    {
      "epoch": 0.03837,
      "grad_norm": 0.6833191218840556,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 3837
    },
    {
      "epoch": 0.03838,
      "grad_norm": 0.8599951602639524,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 3838
    },
    {
      "epoch": 0.03839,
      "grad_norm": 0.9833951212613625,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 3839
    },
    {
      "epoch": 0.0384,
      "grad_norm": 1.1478082520006916,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 3840
    },
    {
      "epoch": 0.03841,
      "grad_norm": 0.8829906180137885,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 3841
    },
    {
      "epoch": 0.03842,
      "grad_norm": 0.7834869010490613,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 3842
    },
    {
      "epoch": 0.03843,
      "grad_norm": 0.7188713056155882,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 3843
    },
    {
      "epoch": 0.03844,
      "grad_norm": 0.7540179942394175,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 3844
    },
    {
      "epoch": 0.03845,
      "grad_norm": 0.7463415722264508,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 3845
    },
    {
      "epoch": 0.03846,
      "grad_norm": 0.7138861193622222,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 3846
    },
    {
      "epoch": 0.03847,
      "grad_norm": 0.648731470870398,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 3847
    },
    {
      "epoch": 0.03848,
      "grad_norm": 0.7547795493913247,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 3848
    },
    {
      "epoch": 0.03849,
      "grad_norm": 0.799301070310209,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 3849
    },
    {
      "epoch": 0.0385,
      "grad_norm": 0.9203078148453605,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 3850
    },
    {
      "epoch": 0.03851,
      "grad_norm": 1.0151353622998731,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 3851
    },
    {
      "epoch": 0.03852,
      "grad_norm": 1.0144010495517577,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 3852
    },
    {
      "epoch": 0.03853,
      "grad_norm": 1.0493705006427887,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 3853
    },
    {
      "epoch": 0.03854,
      "grad_norm": 0.8074337792156867,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 3854
    },
    {
      "epoch": 0.03855,
      "grad_norm": 0.8026707872705597,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 3855
    },
    {
      "epoch": 0.03856,
      "grad_norm": 0.7804974920617295,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 3856
    },
    {
      "epoch": 0.03857,
      "grad_norm": 0.8054893207885017,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 3857
    },
    {
      "epoch": 0.03858,
      "grad_norm": 0.8546443108408268,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 3858
    },
    {
      "epoch": 0.03859,
      "grad_norm": 0.8455331814308655,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 3859
    },
    {
      "epoch": 0.0386,
      "grad_norm": 0.7390789874576452,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 3860
    },
    {
      "epoch": 0.03861,
      "grad_norm": 0.7582288889706922,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 3861
    },
    {
      "epoch": 0.03862,
      "grad_norm": 0.725403944101907,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 3862
    },
    {
      "epoch": 0.03863,
      "grad_norm": 0.8633932250780971,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 3863
    },
    {
      "epoch": 0.03864,
      "grad_norm": 1.1650833586712557,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 3864
    },
    {
      "epoch": 0.03865,
      "grad_norm": 0.9893693208520536,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 3865
    },
    {
      "epoch": 0.03866,
      "grad_norm": 1.0874614460090002,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 3866
    },
    {
      "epoch": 0.03867,
      "grad_norm": 0.7886630657768378,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 3867
    },
    {
      "epoch": 0.03868,
      "grad_norm": 0.885679172374958,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 3868
    },
    {
      "epoch": 0.03869,
      "grad_norm": 1.0141213895298733,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 3869
    },
    {
      "epoch": 0.0387,
      "grad_norm": 1.0688565646198265,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 3870
    },
    {
      "epoch": 0.03871,
      "grad_norm": 0.9129625883332004,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 3871
    },
    {
      "epoch": 0.03872,
      "grad_norm": 0.8736530068321784,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 3872
    },
    {
      "epoch": 0.03873,
      "grad_norm": 0.9127445482435979,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 3873
    },
    {
      "epoch": 0.03874,
      "grad_norm": 1.1125648535743125,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 3874
    },
    {
      "epoch": 0.03875,
      "grad_norm": 1.0031183839809612,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 3875
    },
    {
      "epoch": 0.03876,
      "grad_norm": 0.9060582464048182,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 3876
    },
    {
      "epoch": 0.03877,
      "grad_norm": 0.8392586887613771,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 3877
    },
    {
      "epoch": 0.03878,
      "grad_norm": 0.8001456111460843,
      "learning_rate": 0.003,
      "loss": 4.1209,
      "step": 3878
    },
    {
      "epoch": 0.03879,
      "grad_norm": 0.7457591238961055,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 3879
    },
    {
      "epoch": 0.0388,
      "grad_norm": 0.6124933426277533,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 3880
    },
    {
      "epoch": 0.03881,
      "grad_norm": 0.6131747582366786,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 3881
    },
    {
      "epoch": 0.03882,
      "grad_norm": 0.6113752675559317,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 3882
    },
    {
      "epoch": 0.03883,
      "grad_norm": 0.6919250963347912,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 3883
    },
    {
      "epoch": 0.03884,
      "grad_norm": 0.7426309370638033,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 3884
    },
    {
      "epoch": 0.03885,
      "grad_norm": 0.9915545301535461,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 3885
    },
    {
      "epoch": 0.03886,
      "grad_norm": 1.1987560180799988,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 3886
    },
    {
      "epoch": 0.03887,
      "grad_norm": 0.8162692852302313,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 3887
    },
    {
      "epoch": 0.03888,
      "grad_norm": 1.0025366149196155,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 3888
    },
    {
      "epoch": 0.03889,
      "grad_norm": 1.146222148985757,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 3889
    },
    {
      "epoch": 0.0389,
      "grad_norm": 0.8230126221175265,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 3890
    },
    {
      "epoch": 0.03891,
      "grad_norm": 0.8705081765994817,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 3891
    },
    {
      "epoch": 0.03892,
      "grad_norm": 0.7747593450381118,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 3892
    },
    {
      "epoch": 0.03893,
      "grad_norm": 0.7117098416936728,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 3893
    },
    {
      "epoch": 0.03894,
      "grad_norm": 0.7380911807082656,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 3894
    },
    {
      "epoch": 0.03895,
      "grad_norm": 0.9497127876188006,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 3895
    },
    {
      "epoch": 0.03896,
      "grad_norm": 1.1241836801249476,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 3896
    },
    {
      "epoch": 0.03897,
      "grad_norm": 0.9331139773236928,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 3897
    },
    {
      "epoch": 0.03898,
      "grad_norm": 0.8717563333320789,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 3898
    },
    {
      "epoch": 0.03899,
      "grad_norm": 0.9575796876780424,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 3899
    },
    {
      "epoch": 0.039,
      "grad_norm": 1.1111071654055427,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 3900
    },
    {
      "epoch": 0.03901,
      "grad_norm": 0.9101352152485117,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 3901
    },
    {
      "epoch": 0.03902,
      "grad_norm": 0.9149491392794752,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 3902
    },
    {
      "epoch": 0.03903,
      "grad_norm": 0.9599549632230119,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 3903
    },
    {
      "epoch": 0.03904,
      "grad_norm": 0.9977595444316653,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 3904
    },
    {
      "epoch": 0.03905,
      "grad_norm": 0.9772682724535922,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 3905
    },
    {
      "epoch": 0.03906,
      "grad_norm": 1.116840612654856,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 3906
    },
    {
      "epoch": 0.03907,
      "grad_norm": 0.9927827399452427,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 3907
    },
    {
      "epoch": 0.03908,
      "grad_norm": 1.0513974248161086,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 3908
    },
    {
      "epoch": 0.03909,
      "grad_norm": 0.951299576586947,
      "learning_rate": 0.003,
      "loss": 4.1068,
      "step": 3909
    },
    {
      "epoch": 0.0391,
      "grad_norm": 0.9273375029445102,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 3910
    },
    {
      "epoch": 0.03911,
      "grad_norm": 0.960086352721459,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 3911
    },
    {
      "epoch": 0.03912,
      "grad_norm": 0.9317199647764666,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 3912
    },
    {
      "epoch": 0.03913,
      "grad_norm": 0.8946789663336782,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 3913
    },
    {
      "epoch": 0.03914,
      "grad_norm": 0.8487878737890102,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 3914
    },
    {
      "epoch": 0.03915,
      "grad_norm": 0.8668410486160162,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 3915
    },
    {
      "epoch": 0.03916,
      "grad_norm": 0.8897335359039827,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 3916
    },
    {
      "epoch": 0.03917,
      "grad_norm": 1.0123681906019413,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 3917
    },
    {
      "epoch": 0.03918,
      "grad_norm": 1.0602217244059164,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 3918
    },
    {
      "epoch": 0.03919,
      "grad_norm": 1.02799372222634,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 3919
    },
    {
      "epoch": 0.0392,
      "grad_norm": 0.7728333569294379,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 3920
    },
    {
      "epoch": 0.03921,
      "grad_norm": 0.7158476135413256,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 3921
    },
    {
      "epoch": 0.03922,
      "grad_norm": 0.8125567354684526,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 3922
    },
    {
      "epoch": 0.03923,
      "grad_norm": 0.9863034003179841,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 3923
    },
    {
      "epoch": 0.03924,
      "grad_norm": 1.0668800143383192,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 3924
    },
    {
      "epoch": 0.03925,
      "grad_norm": 0.9458022068309033,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 3925
    },
    {
      "epoch": 0.03926,
      "grad_norm": 0.913203888262044,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 3926
    },
    {
      "epoch": 0.03927,
      "grad_norm": 0.8813175388341771,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 3927
    },
    {
      "epoch": 0.03928,
      "grad_norm": 0.8794380772976899,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 3928
    },
    {
      "epoch": 0.03929,
      "grad_norm": 1.0579124240791549,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 3929
    },
    {
      "epoch": 0.0393,
      "grad_norm": 1.1597780940734346,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 3930
    },
    {
      "epoch": 0.03931,
      "grad_norm": 0.7081278586289745,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 3931
    },
    {
      "epoch": 0.03932,
      "grad_norm": 0.7809455628695875,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 3932
    },
    {
      "epoch": 0.03933,
      "grad_norm": 0.9272931464921295,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 3933
    },
    {
      "epoch": 0.03934,
      "grad_norm": 0.849076414415576,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 3934
    },
    {
      "epoch": 0.03935,
      "grad_norm": 0.7972121383758832,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 3935
    },
    {
      "epoch": 0.03936,
      "grad_norm": 0.736286380065545,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 3936
    },
    {
      "epoch": 0.03937,
      "grad_norm": 0.7969361690394817,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 3937
    },
    {
      "epoch": 0.03938,
      "grad_norm": 0.7648853318696774,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 3938
    },
    {
      "epoch": 0.03939,
      "grad_norm": 0.793082890734816,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 3939
    },
    {
      "epoch": 0.0394,
      "grad_norm": 0.9421729446698976,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 3940
    },
    {
      "epoch": 0.03941,
      "grad_norm": 0.9532085204003842,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 3941
    },
    {
      "epoch": 0.03942,
      "grad_norm": 0.9597084811820548,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 3942
    },
    {
      "epoch": 0.03943,
      "grad_norm": 0.9484931289816191,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 3943
    },
    {
      "epoch": 0.03944,
      "grad_norm": 0.9294543546840306,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 3944
    },
    {
      "epoch": 0.03945,
      "grad_norm": 0.8988004519802858,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 3945
    },
    {
      "epoch": 0.03946,
      "grad_norm": 1.0598680988385802,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 3946
    },
    {
      "epoch": 0.03947,
      "grad_norm": 0.8720535328674782,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 3947
    },
    {
      "epoch": 0.03948,
      "grad_norm": 0.9223454341748609,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 3948
    },
    {
      "epoch": 0.03949,
      "grad_norm": 0.8915715222436728,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 3949
    },
    {
      "epoch": 0.0395,
      "grad_norm": 0.9448936409245293,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 3950
    },
    {
      "epoch": 0.03951,
      "grad_norm": 1.0385370830415304,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 3951
    },
    {
      "epoch": 0.03952,
      "grad_norm": 0.9030178362877832,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 3952
    },
    {
      "epoch": 0.03953,
      "grad_norm": 0.8709314822989492,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 3953
    },
    {
      "epoch": 0.03954,
      "grad_norm": 0.6813347239160019,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 3954
    },
    {
      "epoch": 0.03955,
      "grad_norm": 0.5792685005630919,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 3955
    },
    {
      "epoch": 0.03956,
      "grad_norm": 0.643092840993703,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 3956
    },
    {
      "epoch": 0.03957,
      "grad_norm": 0.5958384751343148,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 3957
    },
    {
      "epoch": 0.03958,
      "grad_norm": 0.7216735106401475,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 3958
    },
    {
      "epoch": 0.03959,
      "grad_norm": 1.0447079173416385,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 3959
    },
    {
      "epoch": 0.0396,
      "grad_norm": 1.390251410216987,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 3960
    },
    {
      "epoch": 0.03961,
      "grad_norm": 0.6008725960965626,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 3961
    },
    {
      "epoch": 0.03962,
      "grad_norm": 1.007662011162288,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 3962
    },
    {
      "epoch": 0.03963,
      "grad_norm": 1.1370046987648283,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 3963
    },
    {
      "epoch": 0.03964,
      "grad_norm": 0.747283204388864,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 3964
    },
    {
      "epoch": 0.03965,
      "grad_norm": 0.8660291958106119,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 3965
    },
    {
      "epoch": 0.03966,
      "grad_norm": 0.8173353175111122,
      "learning_rate": 0.003,
      "loss": 4.1372,
      "step": 3966
    },
    {
      "epoch": 0.03967,
      "grad_norm": 0.8293952582170112,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 3967
    },
    {
      "epoch": 0.03968,
      "grad_norm": 0.7178876491150251,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 3968
    },
    {
      "epoch": 0.03969,
      "grad_norm": 0.6761246373413493,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 3969
    },
    {
      "epoch": 0.0397,
      "grad_norm": 0.781718933884442,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 3970
    },
    {
      "epoch": 0.03971,
      "grad_norm": 0.8449284031201099,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 3971
    },
    {
      "epoch": 0.03972,
      "grad_norm": 0.7027976770057135,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 3972
    },
    {
      "epoch": 0.03973,
      "grad_norm": 0.7615706188874817,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 3973
    },
    {
      "epoch": 0.03974,
      "grad_norm": 0.9198349150433933,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 3974
    },
    {
      "epoch": 0.03975,
      "grad_norm": 0.9827265823761052,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 3975
    },
    {
      "epoch": 0.03976,
      "grad_norm": 0.9713747801072962,
      "learning_rate": 0.003,
      "loss": 4.135,
      "step": 3976
    },
    {
      "epoch": 0.03977,
      "grad_norm": 0.9509110934532634,
      "learning_rate": 0.003,
      "loss": 4.122,
      "step": 3977
    },
    {
      "epoch": 0.03978,
      "grad_norm": 0.8340186563863297,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 3978
    },
    {
      "epoch": 0.03979,
      "grad_norm": 0.8587622003381145,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 3979
    },
    {
      "epoch": 0.0398,
      "grad_norm": 0.9900201402082706,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 3980
    },
    {
      "epoch": 0.03981,
      "grad_norm": 0.8963808643427871,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 3981
    },
    {
      "epoch": 0.03982,
      "grad_norm": 0.8219983933371988,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 3982
    },
    {
      "epoch": 0.03983,
      "grad_norm": 0.8040656765435304,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 3983
    },
    {
      "epoch": 0.03984,
      "grad_norm": 0.7325063244064047,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 3984
    },
    {
      "epoch": 0.03985,
      "grad_norm": 0.883058640101833,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 3985
    },
    {
      "epoch": 0.03986,
      "grad_norm": 1.0904689835883374,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 3986
    },
    {
      "epoch": 0.03987,
      "grad_norm": 1.0592742752953161,
      "learning_rate": 0.003,
      "loss": 4.1145,
      "step": 3987
    },
    {
      "epoch": 0.03988,
      "grad_norm": 0.9215756713392262,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 3988
    },
    {
      "epoch": 0.03989,
      "grad_norm": 0.9308579277666478,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 3989
    },
    {
      "epoch": 0.0399,
      "grad_norm": 0.6893991687243239,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 3990
    },
    {
      "epoch": 0.03991,
      "grad_norm": 0.6626762508427944,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 3991
    },
    {
      "epoch": 0.03992,
      "grad_norm": 0.6277503646624452,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 3992
    },
    {
      "epoch": 0.03993,
      "grad_norm": 0.5389226959273035,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 3993
    },
    {
      "epoch": 0.03994,
      "grad_norm": 0.5575784523742429,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 3994
    },
    {
      "epoch": 0.03995,
      "grad_norm": 0.5740995198289247,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 3995
    },
    {
      "epoch": 0.03996,
      "grad_norm": 0.6506767895381821,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 3996
    },
    {
      "epoch": 0.03997,
      "grad_norm": 0.8018658682841067,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 3997
    },
    {
      "epoch": 0.03998,
      "grad_norm": 1.1165769471951579,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 3998
    },
    {
      "epoch": 0.03999,
      "grad_norm": 0.9183179386537218,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 3999
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.7930924507289704,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 4000
    },
    {
      "epoch": 0.04001,
      "grad_norm": 0.7997730049089519,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 4001
    },
    {
      "epoch": 0.04002,
      "grad_norm": 0.9222776505941644,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 4002
    },
    {
      "epoch": 0.04003,
      "grad_norm": 1.0818631603686821,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 4003
    },
    {
      "epoch": 0.04004,
      "grad_norm": 0.9791417920974259,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 4004
    },
    {
      "epoch": 0.04005,
      "grad_norm": 0.9808789569533176,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4005
    },
    {
      "epoch": 0.04006,
      "grad_norm": 0.8961052673893415,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 4006
    },
    {
      "epoch": 0.04007,
      "grad_norm": 0.8297969899387416,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 4007
    },
    {
      "epoch": 0.04008,
      "grad_norm": 0.936733303052199,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 4008
    },
    {
      "epoch": 0.04009,
      "grad_norm": 0.9644457814855122,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 4009
    },
    {
      "epoch": 0.0401,
      "grad_norm": 1.013041858960856,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 4010
    },
    {
      "epoch": 0.04011,
      "grad_norm": 1.0633452177910276,
      "learning_rate": 0.003,
      "loss": 4.123,
      "step": 4011
    },
    {
      "epoch": 0.04012,
      "grad_norm": 1.0649433557962202,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 4012
    },
    {
      "epoch": 0.04013,
      "grad_norm": 0.7933050292506824,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 4013
    },
    {
      "epoch": 0.04014,
      "grad_norm": 0.7879046370570779,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 4014
    },
    {
      "epoch": 0.04015,
      "grad_norm": 0.8241932652214115,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 4015
    },
    {
      "epoch": 0.04016,
      "grad_norm": 0.8658550465105778,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 4016
    },
    {
      "epoch": 0.04017,
      "grad_norm": 0.9228756535179865,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 4017
    },
    {
      "epoch": 0.04018,
      "grad_norm": 0.9531173484679942,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 4018
    },
    {
      "epoch": 0.04019,
      "grad_norm": 1.0866964724732295,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 4019
    },
    {
      "epoch": 0.0402,
      "grad_norm": 1.0577908225334638,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 4020
    },
    {
      "epoch": 0.04021,
      "grad_norm": 1.0725239975062906,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 4021
    },
    {
      "epoch": 0.04022,
      "grad_norm": 1.0475988978108968,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 4022
    },
    {
      "epoch": 0.04023,
      "grad_norm": 1.065198125983714,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 4023
    },
    {
      "epoch": 0.04024,
      "grad_norm": 0.9510719367654007,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 4024
    },
    {
      "epoch": 0.04025,
      "grad_norm": 0.9965721786284437,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 4025
    },
    {
      "epoch": 0.04026,
      "grad_norm": 1.1597169306225377,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 4026
    },
    {
      "epoch": 0.04027,
      "grad_norm": 0.8115276383463242,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 4027
    },
    {
      "epoch": 0.04028,
      "grad_norm": 0.7507067496165764,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 4028
    },
    {
      "epoch": 0.04029,
      "grad_norm": 0.8043593457637668,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 4029
    },
    {
      "epoch": 0.0403,
      "grad_norm": 0.8475092967533915,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 4030
    },
    {
      "epoch": 0.04031,
      "grad_norm": 0.7950265087708841,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 4031
    },
    {
      "epoch": 0.04032,
      "grad_norm": 0.7929178533450967,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 4032
    },
    {
      "epoch": 0.04033,
      "grad_norm": 0.9419776254998102,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 4033
    },
    {
      "epoch": 0.04034,
      "grad_norm": 1.0174202023914847,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 4034
    },
    {
      "epoch": 0.04035,
      "grad_norm": 1.0008080685810283,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 4035
    },
    {
      "epoch": 0.04036,
      "grad_norm": 0.9875885643657349,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 4036
    },
    {
      "epoch": 0.04037,
      "grad_norm": 1.035750371651436,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 4037
    },
    {
      "epoch": 0.04038,
      "grad_norm": 1.089114000312345,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 4038
    },
    {
      "epoch": 0.04039,
      "grad_norm": 0.7787711745520574,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 4039
    },
    {
      "epoch": 0.0404,
      "grad_norm": 0.8304131011478352,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 4040
    },
    {
      "epoch": 0.04041,
      "grad_norm": 0.8845677735505116,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 4041
    },
    {
      "epoch": 0.04042,
      "grad_norm": 1.0056077779577761,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 4042
    },
    {
      "epoch": 0.04043,
      "grad_norm": 1.303684048760548,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 4043
    },
    {
      "epoch": 0.04044,
      "grad_norm": 0.6804116358869476,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 4044
    },
    {
      "epoch": 0.04045,
      "grad_norm": 0.6777286555779822,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 4045
    },
    {
      "epoch": 0.04046,
      "grad_norm": 0.7168976521438496,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 4046
    },
    {
      "epoch": 0.04047,
      "grad_norm": 0.8016076938020208,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 4047
    },
    {
      "epoch": 0.04048,
      "grad_norm": 0.8003360249896011,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 4048
    },
    {
      "epoch": 0.04049,
      "grad_norm": 0.7905185716828504,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 4049
    },
    {
      "epoch": 0.0405,
      "grad_norm": 0.7170296620649927,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 4050
    },
    {
      "epoch": 0.04051,
      "grad_norm": 0.8022124254069631,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 4051
    },
    {
      "epoch": 0.04052,
      "grad_norm": 0.9348016278831669,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 4052
    },
    {
      "epoch": 0.04053,
      "grad_norm": 0.8618520313571836,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 4053
    },
    {
      "epoch": 0.04054,
      "grad_norm": 0.8261211062660251,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 4054
    },
    {
      "epoch": 0.04055,
      "grad_norm": 0.6845606042201053,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 4055
    },
    {
      "epoch": 0.04056,
      "grad_norm": 0.6070419310929447,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 4056
    },
    {
      "epoch": 0.04057,
      "grad_norm": 0.5685943884831188,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 4057
    },
    {
      "epoch": 0.04058,
      "grad_norm": 0.6709504673340433,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 4058
    },
    {
      "epoch": 0.04059,
      "grad_norm": 0.8379919064931752,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 4059
    },
    {
      "epoch": 0.0406,
      "grad_norm": 0.8620293391959047,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 4060
    },
    {
      "epoch": 0.04061,
      "grad_norm": 0.7852271201009998,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 4061
    },
    {
      "epoch": 0.04062,
      "grad_norm": 0.8559296475729065,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 4062
    },
    {
      "epoch": 0.04063,
      "grad_norm": 0.8779279835196838,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 4063
    },
    {
      "epoch": 0.04064,
      "grad_norm": 0.8897248490868346,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 4064
    },
    {
      "epoch": 0.04065,
      "grad_norm": 0.9175138271766847,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 4065
    },
    {
      "epoch": 0.04066,
      "grad_norm": 0.9362345350031807,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 4066
    },
    {
      "epoch": 0.04067,
      "grad_norm": 1.1545536887257815,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 4067
    },
    {
      "epoch": 0.04068,
      "grad_norm": 1.0240578066067638,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 4068
    },
    {
      "epoch": 0.04069,
      "grad_norm": 0.941652681317373,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 4069
    },
    {
      "epoch": 0.0407,
      "grad_norm": 0.9346763708267345,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 4070
    },
    {
      "epoch": 0.04071,
      "grad_norm": 0.9599025044832693,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 4071
    },
    {
      "epoch": 0.04072,
      "grad_norm": 0.8612213977045744,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 4072
    },
    {
      "epoch": 0.04073,
      "grad_norm": 0.8763585891787764,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 4073
    },
    {
      "epoch": 0.04074,
      "grad_norm": 0.8539311608465675,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 4074
    },
    {
      "epoch": 0.04075,
      "grad_norm": 0.8369021460447633,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 4075
    },
    {
      "epoch": 0.04076,
      "grad_norm": 0.8578020250558176,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 4076
    },
    {
      "epoch": 0.04077,
      "grad_norm": 0.85497575989368,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 4077
    },
    {
      "epoch": 0.04078,
      "grad_norm": 0.9407927776815804,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 4078
    },
    {
      "epoch": 0.04079,
      "grad_norm": 1.05128762652415,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 4079
    },
    {
      "epoch": 0.0408,
      "grad_norm": 1.1019406938246195,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 4080
    },
    {
      "epoch": 0.04081,
      "grad_norm": 0.9496061799293344,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 4081
    },
    {
      "epoch": 0.04082,
      "grad_norm": 0.8481874389103903,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 4082
    },
    {
      "epoch": 0.04083,
      "grad_norm": 0.7274191448577992,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 4083
    },
    {
      "epoch": 0.04084,
      "grad_norm": 0.7904195126005293,
      "learning_rate": 0.003,
      "loss": 4.1207,
      "step": 4084
    },
    {
      "epoch": 0.04085,
      "grad_norm": 1.0202097759202264,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 4085
    },
    {
      "epoch": 0.04086,
      "grad_norm": 1.201486915510698,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 4086
    },
    {
      "epoch": 0.04087,
      "grad_norm": 0.868844436475151,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 4087
    },
    {
      "epoch": 0.04088,
      "grad_norm": 0.9444985463912003,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 4088
    },
    {
      "epoch": 0.04089,
      "grad_norm": 0.9918074019046071,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 4089
    },
    {
      "epoch": 0.0409,
      "grad_norm": 1.0955168255438645,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 4090
    },
    {
      "epoch": 0.04091,
      "grad_norm": 1.063937537128444,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 4091
    },
    {
      "epoch": 0.04092,
      "grad_norm": 0.9343133313137877,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 4092
    },
    {
      "epoch": 0.04093,
      "grad_norm": 0.8938252574219301,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 4093
    },
    {
      "epoch": 0.04094,
      "grad_norm": 0.8646025553563678,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 4094
    },
    {
      "epoch": 0.04095,
      "grad_norm": 0.8849098800450139,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 4095
    },
    {
      "epoch": 0.04096,
      "grad_norm": 0.8639434333725027,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 4096
    },
    {
      "epoch": 0.04097,
      "grad_norm": 0.8248508344367464,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 4097
    },
    {
      "epoch": 0.04098,
      "grad_norm": 0.7799456648454658,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 4098
    },
    {
      "epoch": 0.04099,
      "grad_norm": 0.8045819576448443,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 4099
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.8747001760069023,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 4100
    },
    {
      "epoch": 0.04101,
      "grad_norm": 0.8938516305008589,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 4101
    },
    {
      "epoch": 0.04102,
      "grad_norm": 0.8240754586578543,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 4102
    },
    {
      "epoch": 0.04103,
      "grad_norm": 0.7515676676481744,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 4103
    },
    {
      "epoch": 0.04104,
      "grad_norm": 0.7588461451530453,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 4104
    },
    {
      "epoch": 0.04105,
      "grad_norm": 0.8416483809920972,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 4105
    },
    {
      "epoch": 0.04106,
      "grad_norm": 0.9997989043605089,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 4106
    },
    {
      "epoch": 0.04107,
      "grad_norm": 1.4519560189758212,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 4107
    },
    {
      "epoch": 0.04108,
      "grad_norm": 0.6881032147408636,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 4108
    },
    {
      "epoch": 0.04109,
      "grad_norm": 0.7741592484410211,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 4109
    },
    {
      "epoch": 0.0411,
      "grad_norm": 0.8818900570270763,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 4110
    },
    {
      "epoch": 0.04111,
      "grad_norm": 1.0723961097005286,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 4111
    },
    {
      "epoch": 0.04112,
      "grad_norm": 1.0495234672278315,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 4112
    },
    {
      "epoch": 0.04113,
      "grad_norm": 1.028878109333595,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 4113
    },
    {
      "epoch": 0.04114,
      "grad_norm": 0.7814661056488829,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 4114
    },
    {
      "epoch": 0.04115,
      "grad_norm": 0.7046355162923472,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 4115
    },
    {
      "epoch": 0.04116,
      "grad_norm": 0.7912332755348991,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 4116
    },
    {
      "epoch": 0.04117,
      "grad_norm": 0.8341278502744938,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 4117
    },
    {
      "epoch": 0.04118,
      "grad_norm": 0.8875854653843481,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 4118
    },
    {
      "epoch": 0.04119,
      "grad_norm": 0.8609577714531109,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 4119
    },
    {
      "epoch": 0.0412,
      "grad_norm": 0.9318473319692638,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 4120
    },
    {
      "epoch": 0.04121,
      "grad_norm": 0.9493333365772441,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 4121
    },
    {
      "epoch": 0.04122,
      "grad_norm": 0.9836587921676327,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 4122
    },
    {
      "epoch": 0.04123,
      "grad_norm": 0.9314136846053894,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 4123
    },
    {
      "epoch": 0.04124,
      "grad_norm": 0.8615395432982054,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 4124
    },
    {
      "epoch": 0.04125,
      "grad_norm": 0.8303231719235581,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 4125
    },
    {
      "epoch": 0.04126,
      "grad_norm": 0.9351121203757022,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 4126
    },
    {
      "epoch": 0.04127,
      "grad_norm": 0.9548721015062305,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 4127
    },
    {
      "epoch": 0.04128,
      "grad_norm": 0.904134032279689,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 4128
    },
    {
      "epoch": 0.04129,
      "grad_norm": 1.0360809502468598,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 4129
    },
    {
      "epoch": 0.0413,
      "grad_norm": 1.141478517660115,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 4130
    },
    {
      "epoch": 0.04131,
      "grad_norm": 1.097730707750305,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 4131
    },
    {
      "epoch": 0.04132,
      "grad_norm": 0.9015795328637733,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 4132
    },
    {
      "epoch": 0.04133,
      "grad_norm": 0.9605681859330745,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 4133
    },
    {
      "epoch": 0.04134,
      "grad_norm": 0.9723288613378216,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 4134
    },
    {
      "epoch": 0.04135,
      "grad_norm": 0.9769907462898332,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 4135
    },
    {
      "epoch": 0.04136,
      "grad_norm": 0.8696285037251947,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 4136
    },
    {
      "epoch": 0.04137,
      "grad_norm": 0.8622412103402255,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 4137
    },
    {
      "epoch": 0.04138,
      "grad_norm": 0.8940895987636069,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 4138
    },
    {
      "epoch": 0.04139,
      "grad_norm": 0.8558944236718854,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 4139
    },
    {
      "epoch": 0.0414,
      "grad_norm": 0.7906160709298794,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 4140
    },
    {
      "epoch": 0.04141,
      "grad_norm": 0.8043674825801352,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 4141
    },
    {
      "epoch": 0.04142,
      "grad_norm": 0.8668589703050257,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 4142
    },
    {
      "epoch": 0.04143,
      "grad_norm": 1.0143988611429429,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 4143
    },
    {
      "epoch": 0.04144,
      "grad_norm": 1.1221724323882003,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 4144
    },
    {
      "epoch": 0.04145,
      "grad_norm": 0.9750842855026217,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 4145
    },
    {
      "epoch": 0.04146,
      "grad_norm": 0.837063893029182,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 4146
    },
    {
      "epoch": 0.04147,
      "grad_norm": 0.817279014947959,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 4147
    },
    {
      "epoch": 0.04148,
      "grad_norm": 0.6652410842078591,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 4148
    },
    {
      "epoch": 0.04149,
      "grad_norm": 0.601931902805492,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 4149
    },
    {
      "epoch": 0.0415,
      "grad_norm": 0.5744893147283616,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 4150
    },
    {
      "epoch": 0.04151,
      "grad_norm": 0.5594756907939531,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 4151
    },
    {
      "epoch": 0.04152,
      "grad_norm": 0.6696918347600734,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 4152
    },
    {
      "epoch": 0.04153,
      "grad_norm": 0.8337852170452664,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 4153
    },
    {
      "epoch": 0.04154,
      "grad_norm": 0.8486120723348882,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 4154
    },
    {
      "epoch": 0.04155,
      "grad_norm": 0.697828069478384,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 4155
    },
    {
      "epoch": 0.04156,
      "grad_norm": 0.4957422759820407,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 4156
    },
    {
      "epoch": 0.04157,
      "grad_norm": 0.5563098539794213,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 4157
    },
    {
      "epoch": 0.04158,
      "grad_norm": 0.6673553499632091,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 4158
    },
    {
      "epoch": 0.04159,
      "grad_norm": 0.8290986224854208,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 4159
    },
    {
      "epoch": 0.0416,
      "grad_norm": 0.9164477827441916,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 4160
    },
    {
      "epoch": 0.04161,
      "grad_norm": 0.8138741601967407,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 4161
    },
    {
      "epoch": 0.04162,
      "grad_norm": 0.7719431484259072,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 4162
    },
    {
      "epoch": 0.04163,
      "grad_norm": 0.7815652988597618,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 4163
    },
    {
      "epoch": 0.04164,
      "grad_norm": 0.9136897661923532,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 4164
    },
    {
      "epoch": 0.04165,
      "grad_norm": 1.0910864238229012,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 4165
    },
    {
      "epoch": 0.04166,
      "grad_norm": 1.0985022482979871,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 4166
    },
    {
      "epoch": 0.04167,
      "grad_norm": 0.884758586994079,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 4167
    },
    {
      "epoch": 0.04168,
      "grad_norm": 0.8375929677822369,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 4168
    },
    {
      "epoch": 0.04169,
      "grad_norm": 0.8912187343234272,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 4169
    },
    {
      "epoch": 0.0417,
      "grad_norm": 1.09510323581272,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 4170
    },
    {
      "epoch": 0.04171,
      "grad_norm": 0.9235321645280937,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 4171
    },
    {
      "epoch": 0.04172,
      "grad_norm": 0.9054535762684148,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 4172
    },
    {
      "epoch": 0.04173,
      "grad_norm": 0.9533816623318108,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 4173
    },
    {
      "epoch": 0.04174,
      "grad_norm": 0.9693424734311672,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 4174
    },
    {
      "epoch": 0.04175,
      "grad_norm": 0.9476037015111809,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 4175
    },
    {
      "epoch": 0.04176,
      "grad_norm": 0.9584295419287631,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 4176
    },
    {
      "epoch": 0.04177,
      "grad_norm": 0.9248605600298978,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 4177
    },
    {
      "epoch": 0.04178,
      "grad_norm": 1.0049448032304569,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 4178
    },
    {
      "epoch": 0.04179,
      "grad_norm": 1.0827168186886107,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 4179
    },
    {
      "epoch": 0.0418,
      "grad_norm": 0.8883360779651741,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 4180
    },
    {
      "epoch": 0.04181,
      "grad_norm": 1.0344215717913283,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 4181
    },
    {
      "epoch": 0.04182,
      "grad_norm": 0.9869075813400712,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 4182
    },
    {
      "epoch": 0.04183,
      "grad_norm": 0.8294921509305445,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 4183
    },
    {
      "epoch": 0.04184,
      "grad_norm": 0.869514009963607,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 4184
    },
    {
      "epoch": 0.04185,
      "grad_norm": 1.0166270234192272,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 4185
    },
    {
      "epoch": 0.04186,
      "grad_norm": 0.9675533699418017,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 4186
    },
    {
      "epoch": 0.04187,
      "grad_norm": 1.118617978668176,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 4187
    },
    {
      "epoch": 0.04188,
      "grad_norm": 0.9597380404807206,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 4188
    },
    {
      "epoch": 0.04189,
      "grad_norm": 0.8821463731911939,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 4189
    },
    {
      "epoch": 0.0419,
      "grad_norm": 0.8335574771570168,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 4190
    },
    {
      "epoch": 0.04191,
      "grad_norm": 0.960590386364999,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 4191
    },
    {
      "epoch": 0.04192,
      "grad_norm": 1.1082087641443477,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 4192
    },
    {
      "epoch": 0.04193,
      "grad_norm": 1.1014870968077684,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 4193
    },
    {
      "epoch": 0.04194,
      "grad_norm": 1.004137077094656,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 4194
    },
    {
      "epoch": 0.04195,
      "grad_norm": 1.0060456499295114,
      "learning_rate": 0.003,
      "loss": 4.1185,
      "step": 4195
    },
    {
      "epoch": 0.04196,
      "grad_norm": 0.9781412326562324,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 4196
    },
    {
      "epoch": 0.04197,
      "grad_norm": 0.966982025696319,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 4197
    },
    {
      "epoch": 0.04198,
      "grad_norm": 1.0321323425834754,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 4198
    },
    {
      "epoch": 0.04199,
      "grad_norm": 0.9725763984900901,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 4199
    },
    {
      "epoch": 0.042,
      "grad_norm": 0.9044032352315242,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 4200
    },
    {
      "epoch": 0.04201,
      "grad_norm": 0.876685657514267,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 4201
    },
    {
      "epoch": 0.04202,
      "grad_norm": 1.0379948329277224,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 4202
    },
    {
      "epoch": 0.04203,
      "grad_norm": 0.9407666140032972,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 4203
    },
    {
      "epoch": 0.04204,
      "grad_norm": 0.9627655702680004,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 4204
    },
    {
      "epoch": 0.04205,
      "grad_norm": 0.879727149930489,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 4205
    },
    {
      "epoch": 0.04206,
      "grad_norm": 0.7201424170354667,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 4206
    },
    {
      "epoch": 0.04207,
      "grad_norm": 0.8722237657873413,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 4207
    },
    {
      "epoch": 0.04208,
      "grad_norm": 1.057153458391563,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 4208
    },
    {
      "epoch": 0.04209,
      "grad_norm": 1.0856375627812824,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 4209
    },
    {
      "epoch": 0.0421,
      "grad_norm": 0.8686311196866661,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 4210
    },
    {
      "epoch": 0.04211,
      "grad_norm": 0.772423867231878,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 4211
    },
    {
      "epoch": 0.04212,
      "grad_norm": 0.9206795604812895,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 4212
    },
    {
      "epoch": 0.04213,
      "grad_norm": 1.024206681316176,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 4213
    },
    {
      "epoch": 0.04214,
      "grad_norm": 0.9670322877487537,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 4214
    },
    {
      "epoch": 0.04215,
      "grad_norm": 0.7494961355008928,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 4215
    },
    {
      "epoch": 0.04216,
      "grad_norm": 0.765486635210703,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 4216
    },
    {
      "epoch": 0.04217,
      "grad_norm": 0.7160617513742286,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 4217
    },
    {
      "epoch": 0.04218,
      "grad_norm": 0.8033027325871259,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 4218
    },
    {
      "epoch": 0.04219,
      "grad_norm": 0.8144707096133446,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 4219
    },
    {
      "epoch": 0.0422,
      "grad_norm": 0.8870053013997257,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 4220
    },
    {
      "epoch": 0.04221,
      "grad_norm": 1.0572558853767193,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 4221
    },
    {
      "epoch": 0.04222,
      "grad_norm": 0.8601884630133385,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 4222
    },
    {
      "epoch": 0.04223,
      "grad_norm": 0.7192916764502633,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 4223
    },
    {
      "epoch": 0.04224,
      "grad_norm": 0.7121512553768459,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 4224
    },
    {
      "epoch": 0.04225,
      "grad_norm": 0.7021515831824932,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 4225
    },
    {
      "epoch": 0.04226,
      "grad_norm": 0.8314637642734086,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 4226
    },
    {
      "epoch": 0.04227,
      "grad_norm": 0.9670578496344213,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 4227
    },
    {
      "epoch": 0.04228,
      "grad_norm": 1.049287879558258,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 4228
    },
    {
      "epoch": 0.04229,
      "grad_norm": 1.0992300949270042,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 4229
    },
    {
      "epoch": 0.0423,
      "grad_norm": 0.900897411198522,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 4230
    },
    {
      "epoch": 0.04231,
      "grad_norm": 0.9451671959275939,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 4231
    },
    {
      "epoch": 0.04232,
      "grad_norm": 0.7956005679951864,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 4232
    },
    {
      "epoch": 0.04233,
      "grad_norm": 0.8292582126895408,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 4233
    },
    {
      "epoch": 0.04234,
      "grad_norm": 0.8871903907197981,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 4234
    },
    {
      "epoch": 0.04235,
      "grad_norm": 0.9300140410326806,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 4235
    },
    {
      "epoch": 0.04236,
      "grad_norm": 1.0741902913167412,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 4236
    },
    {
      "epoch": 0.04237,
      "grad_norm": 0.9580888994428801,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 4237
    },
    {
      "epoch": 0.04238,
      "grad_norm": 0.9482815531910007,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 4238
    },
    {
      "epoch": 0.04239,
      "grad_norm": 0.9555806662772239,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 4239
    },
    {
      "epoch": 0.0424,
      "grad_norm": 0.8664629875137623,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 4240
    },
    {
      "epoch": 0.04241,
      "grad_norm": 0.9630766502725931,
      "learning_rate": 0.003,
      "loss": 4.1171,
      "step": 4241
    },
    {
      "epoch": 0.04242,
      "grad_norm": 1.0804387798244068,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 4242
    },
    {
      "epoch": 0.04243,
      "grad_norm": 0.9274350779718051,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 4243
    },
    {
      "epoch": 0.04244,
      "grad_norm": 0.913816427911704,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 4244
    },
    {
      "epoch": 0.04245,
      "grad_norm": 1.106413525319224,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 4245
    },
    {
      "epoch": 0.04246,
      "grad_norm": 0.9030634078700811,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 4246
    },
    {
      "epoch": 0.04247,
      "grad_norm": 0.8416832028937715,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 4247
    },
    {
      "epoch": 0.04248,
      "grad_norm": 0.8169991638726913,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 4248
    },
    {
      "epoch": 0.04249,
      "grad_norm": 0.8044824810175584,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 4249
    },
    {
      "epoch": 0.0425,
      "grad_norm": 0.8896696091741753,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 4250
    },
    {
      "epoch": 0.04251,
      "grad_norm": 0.9502948772765745,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 4251
    },
    {
      "epoch": 0.04252,
      "grad_norm": 1.0326556881542461,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 4252
    },
    {
      "epoch": 0.04253,
      "grad_norm": 1.0173408114295495,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 4253
    },
    {
      "epoch": 0.04254,
      "grad_norm": 1.0646057446624475,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 4254
    },
    {
      "epoch": 0.04255,
      "grad_norm": 0.8420645317602736,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 4255
    },
    {
      "epoch": 0.04256,
      "grad_norm": 0.7201557468187585,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 4256
    },
    {
      "epoch": 0.04257,
      "grad_norm": 0.7996495415748023,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 4257
    },
    {
      "epoch": 0.04258,
      "grad_norm": 0.8858738615733014,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 4258
    },
    {
      "epoch": 0.04259,
      "grad_norm": 0.9662802527822355,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 4259
    },
    {
      "epoch": 0.0426,
      "grad_norm": 0.9169898773347952,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 4260
    },
    {
      "epoch": 0.04261,
      "grad_norm": 0.9208708917372306,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 4261
    },
    {
      "epoch": 0.04262,
      "grad_norm": 0.8866710041215375,
      "learning_rate": 0.003,
      "loss": 4.1145,
      "step": 4262
    },
    {
      "epoch": 0.04263,
      "grad_norm": 0.8426381040253105,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 4263
    },
    {
      "epoch": 0.04264,
      "grad_norm": 0.743799590275064,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 4264
    },
    {
      "epoch": 0.04265,
      "grad_norm": 0.8815249453115196,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 4265
    },
    {
      "epoch": 0.04266,
      "grad_norm": 0.8452841678134956,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 4266
    },
    {
      "epoch": 0.04267,
      "grad_norm": 0.7585523353787604,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 4267
    },
    {
      "epoch": 0.04268,
      "grad_norm": 0.7042766430430472,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 4268
    },
    {
      "epoch": 0.04269,
      "grad_norm": 0.6767603788338118,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 4269
    },
    {
      "epoch": 0.0427,
      "grad_norm": 0.6619260992554636,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 4270
    },
    {
      "epoch": 0.04271,
      "grad_norm": 0.716784572802792,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 4271
    },
    {
      "epoch": 0.04272,
      "grad_norm": 0.9018222570354946,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 4272
    },
    {
      "epoch": 0.04273,
      "grad_norm": 1.0946540429372817,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 4273
    },
    {
      "epoch": 0.04274,
      "grad_norm": 0.8458329250448817,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 4274
    },
    {
      "epoch": 0.04275,
      "grad_norm": 0.6435172962120423,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 4275
    },
    {
      "epoch": 0.04276,
      "grad_norm": 0.6643590249615142,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 4276
    },
    {
      "epoch": 0.04277,
      "grad_norm": 0.79792656427568,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 4277
    },
    {
      "epoch": 0.04278,
      "grad_norm": 0.740840553173491,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 4278
    },
    {
      "epoch": 0.04279,
      "grad_norm": 0.7711154804154717,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 4279
    },
    {
      "epoch": 0.0428,
      "grad_norm": 0.7250008586500746,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 4280
    },
    {
      "epoch": 0.04281,
      "grad_norm": 0.7499387037166975,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 4281
    },
    {
      "epoch": 0.04282,
      "grad_norm": 0.7545960253753359,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 4282
    },
    {
      "epoch": 0.04283,
      "grad_norm": 0.839588483108746,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 4283
    },
    {
      "epoch": 0.04284,
      "grad_norm": 0.86181150410575,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 4284
    },
    {
      "epoch": 0.04285,
      "grad_norm": 0.8662575012952002,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 4285
    },
    {
      "epoch": 0.04286,
      "grad_norm": 1.0398949684975605,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 4286
    },
    {
      "epoch": 0.04287,
      "grad_norm": 1.1167633903119278,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 4287
    },
    {
      "epoch": 0.04288,
      "grad_norm": 0.9953157675421085,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 4288
    },
    {
      "epoch": 0.04289,
      "grad_norm": 0.9963288991872585,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 4289
    },
    {
      "epoch": 0.0429,
      "grad_norm": 1.0134088286440928,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 4290
    },
    {
      "epoch": 0.04291,
      "grad_norm": 1.0632191551515977,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 4291
    },
    {
      "epoch": 0.04292,
      "grad_norm": 1.1744325304960588,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 4292
    },
    {
      "epoch": 0.04293,
      "grad_norm": 0.8830847280464275,
      "learning_rate": 0.003,
      "loss": 4.1184,
      "step": 4293
    },
    {
      "epoch": 0.04294,
      "grad_norm": 0.8997282546954833,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 4294
    },
    {
      "epoch": 0.04295,
      "grad_norm": 1.0130781104561735,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 4295
    },
    {
      "epoch": 0.04296,
      "grad_norm": 0.7688499577075354,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 4296
    },
    {
      "epoch": 0.04297,
      "grad_norm": 0.8069754114362383,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 4297
    },
    {
      "epoch": 0.04298,
      "grad_norm": 0.7696772164144003,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 4298
    },
    {
      "epoch": 0.04299,
      "grad_norm": 0.731529824570407,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 4299
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.7813911597495976,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 4300
    },
    {
      "epoch": 0.04301,
      "grad_norm": 0.9515070484621972,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 4301
    },
    {
      "epoch": 0.04302,
      "grad_norm": 1.1524985858220247,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 4302
    },
    {
      "epoch": 0.04303,
      "grad_norm": 0.9661803127063716,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 4303
    },
    {
      "epoch": 0.04304,
      "grad_norm": 0.8958980804239224,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 4304
    },
    {
      "epoch": 0.04305,
      "grad_norm": 0.9457226757083944,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 4305
    },
    {
      "epoch": 0.04306,
      "grad_norm": 0.866320067732626,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 4306
    },
    {
      "epoch": 0.04307,
      "grad_norm": 0.9815566634548711,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 4307
    },
    {
      "epoch": 0.04308,
      "grad_norm": 1.1045714176495842,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 4308
    },
    {
      "epoch": 0.04309,
      "grad_norm": 0.9997081776371058,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 4309
    },
    {
      "epoch": 0.0431,
      "grad_norm": 1.014433219426494,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 4310
    },
    {
      "epoch": 0.04311,
      "grad_norm": 1.0841431057954531,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 4311
    },
    {
      "epoch": 0.04312,
      "grad_norm": 0.8706786032806877,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 4312
    },
    {
      "epoch": 0.04313,
      "grad_norm": 1.0443926449585144,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 4313
    },
    {
      "epoch": 0.04314,
      "grad_norm": 1.0606632584516302,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 4314
    },
    {
      "epoch": 0.04315,
      "grad_norm": 0.8710054154104404,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 4315
    },
    {
      "epoch": 0.04316,
      "grad_norm": 0.7770801944004984,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 4316
    },
    {
      "epoch": 0.04317,
      "grad_norm": 0.808693446756647,
      "learning_rate": 0.003,
      "loss": 4.1163,
      "step": 4317
    },
    {
      "epoch": 0.04318,
      "grad_norm": 0.783023904953245,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 4318
    },
    {
      "epoch": 0.04319,
      "grad_norm": 0.7045592780420906,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 4319
    },
    {
      "epoch": 0.0432,
      "grad_norm": 0.6211212627554941,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 4320
    },
    {
      "epoch": 0.04321,
      "grad_norm": 0.7218417386943828,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 4321
    },
    {
      "epoch": 0.04322,
      "grad_norm": 0.7864622459064595,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 4322
    },
    {
      "epoch": 0.04323,
      "grad_norm": 1.0759947667136016,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 4323
    },
    {
      "epoch": 0.04324,
      "grad_norm": 1.3618558609167744,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 4324
    },
    {
      "epoch": 0.04325,
      "grad_norm": 0.8537661961267661,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 4325
    },
    {
      "epoch": 0.04326,
      "grad_norm": 0.8673429528992446,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 4326
    },
    {
      "epoch": 0.04327,
      "grad_norm": 0.8784142198291053,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 4327
    },
    {
      "epoch": 0.04328,
      "grad_norm": 0.964766867252245,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 4328
    },
    {
      "epoch": 0.04329,
      "grad_norm": 1.159486755106838,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 4329
    },
    {
      "epoch": 0.0433,
      "grad_norm": 0.9798503929358533,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 4330
    },
    {
      "epoch": 0.04331,
      "grad_norm": 0.8036875374792932,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 4331
    },
    {
      "epoch": 0.04332,
      "grad_norm": 0.8974020360029874,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 4332
    },
    {
      "epoch": 0.04333,
      "grad_norm": 0.952759627899482,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 4333
    },
    {
      "epoch": 0.04334,
      "grad_norm": 0.9486168347147088,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 4334
    },
    {
      "epoch": 0.04335,
      "grad_norm": 0.9411584678122592,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 4335
    },
    {
      "epoch": 0.04336,
      "grad_norm": 0.8380130707060495,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 4336
    },
    {
      "epoch": 0.04337,
      "grad_norm": 0.8435104385458285,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 4337
    },
    {
      "epoch": 0.04338,
      "grad_norm": 0.8283613873296221,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 4338
    },
    {
      "epoch": 0.04339,
      "grad_norm": 0.7575807206093103,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 4339
    },
    {
      "epoch": 0.0434,
      "grad_norm": 0.7052060232541129,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 4340
    },
    {
      "epoch": 0.04341,
      "grad_norm": 0.7393447733605004,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 4341
    },
    {
      "epoch": 0.04342,
      "grad_norm": 0.6868972479892425,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 4342
    },
    {
      "epoch": 0.04343,
      "grad_norm": 0.642426794922686,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 4343
    },
    {
      "epoch": 0.04344,
      "grad_norm": 0.6793615281465072,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 4344
    },
    {
      "epoch": 0.04345,
      "grad_norm": 0.6547984850581937,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 4345
    },
    {
      "epoch": 0.04346,
      "grad_norm": 0.7129305907572272,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 4346
    },
    {
      "epoch": 0.04347,
      "grad_norm": 0.7392919981263668,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 4347
    },
    {
      "epoch": 0.04348,
      "grad_norm": 0.7595005587543163,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 4348
    },
    {
      "epoch": 0.04349,
      "grad_norm": 0.8185974571279835,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 4349
    },
    {
      "epoch": 0.0435,
      "grad_norm": 0.9514565408114718,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 4350
    },
    {
      "epoch": 0.04351,
      "grad_norm": 1.2109347120622453,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 4351
    },
    {
      "epoch": 0.04352,
      "grad_norm": 0.9471998868314153,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 4352
    },
    {
      "epoch": 0.04353,
      "grad_norm": 0.9889455977002414,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 4353
    },
    {
      "epoch": 0.04354,
      "grad_norm": 1.0731295960755523,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 4354
    },
    {
      "epoch": 0.04355,
      "grad_norm": 0.9572032046556027,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 4355
    },
    {
      "epoch": 0.04356,
      "grad_norm": 0.9581656158071228,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 4356
    },
    {
      "epoch": 0.04357,
      "grad_norm": 0.9195125428358254,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 4357
    },
    {
      "epoch": 0.04358,
      "grad_norm": 0.9636453763618577,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 4358
    },
    {
      "epoch": 0.04359,
      "grad_norm": 1.0660232103277558,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 4359
    },
    {
      "epoch": 0.0436,
      "grad_norm": 0.8061306785792844,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 4360
    },
    {
      "epoch": 0.04361,
      "grad_norm": 0.8093145893752948,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 4361
    },
    {
      "epoch": 0.04362,
      "grad_norm": 0.8171945284904929,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 4362
    },
    {
      "epoch": 0.04363,
      "grad_norm": 0.9868190592217813,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 4363
    },
    {
      "epoch": 0.04364,
      "grad_norm": 1.0426300462439566,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 4364
    },
    {
      "epoch": 0.04365,
      "grad_norm": 0.8588093044292321,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 4365
    },
    {
      "epoch": 0.04366,
      "grad_norm": 0.8909026739864028,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 4366
    },
    {
      "epoch": 0.04367,
      "grad_norm": 0.873925255651534,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 4367
    },
    {
      "epoch": 0.04368,
      "grad_norm": 0.779588637997916,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 4368
    },
    {
      "epoch": 0.04369,
      "grad_norm": 0.7966324124265414,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 4369
    },
    {
      "epoch": 0.0437,
      "grad_norm": 0.669780598735826,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 4370
    },
    {
      "epoch": 0.04371,
      "grad_norm": 0.7760483978802495,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 4371
    },
    {
      "epoch": 0.04372,
      "grad_norm": 0.9716643560234968,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 4372
    },
    {
      "epoch": 0.04373,
      "grad_norm": 1.2693018325935452,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 4373
    },
    {
      "epoch": 0.04374,
      "grad_norm": 1.0224726158531943,
      "learning_rate": 0.003,
      "loss": 4.1489,
      "step": 4374
    },
    {
      "epoch": 0.04375,
      "grad_norm": 0.8263535068880948,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 4375
    },
    {
      "epoch": 0.04376,
      "grad_norm": 0.7522103171040029,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 4376
    },
    {
      "epoch": 0.04377,
      "grad_norm": 0.8208201264915056,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 4377
    },
    {
      "epoch": 0.04378,
      "grad_norm": 1.005702386332168,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 4378
    },
    {
      "epoch": 0.04379,
      "grad_norm": 1.1786773838771147,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 4379
    },
    {
      "epoch": 0.0438,
      "grad_norm": 0.7871810906501763,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 4380
    },
    {
      "epoch": 0.04381,
      "grad_norm": 0.7577866085120181,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 4381
    },
    {
      "epoch": 0.04382,
      "grad_norm": 0.830788547447954,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 4382
    },
    {
      "epoch": 0.04383,
      "grad_norm": 0.9439340501432005,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 4383
    },
    {
      "epoch": 0.04384,
      "grad_norm": 0.9569811292204884,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 4384
    },
    {
      "epoch": 0.04385,
      "grad_norm": 0.9621273589739346,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 4385
    },
    {
      "epoch": 0.04386,
      "grad_norm": 1.0027765639035047,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 4386
    },
    {
      "epoch": 0.04387,
      "grad_norm": 0.8877446565419372,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 4387
    },
    {
      "epoch": 0.04388,
      "grad_norm": 1.004365074328248,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 4388
    },
    {
      "epoch": 0.04389,
      "grad_norm": 1.167012365586639,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 4389
    },
    {
      "epoch": 0.0439,
      "grad_norm": 1.126759575747849,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 4390
    },
    {
      "epoch": 0.04391,
      "grad_norm": 0.8766190371623821,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 4391
    },
    {
      "epoch": 0.04392,
      "grad_norm": 0.9422353250001032,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 4392
    },
    {
      "epoch": 0.04393,
      "grad_norm": 0.9388161319056185,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 4393
    },
    {
      "epoch": 0.04394,
      "grad_norm": 0.9030510084400544,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 4394
    },
    {
      "epoch": 0.04395,
      "grad_norm": 0.8932781141162982,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 4395
    },
    {
      "epoch": 0.04396,
      "grad_norm": 0.8796817611377403,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 4396
    },
    {
      "epoch": 0.04397,
      "grad_norm": 0.844265233977852,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 4397
    },
    {
      "epoch": 0.04398,
      "grad_norm": 0.9152454694688225,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 4398
    },
    {
      "epoch": 0.04399,
      "grad_norm": 0.9294951448787108,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 4399
    },
    {
      "epoch": 0.044,
      "grad_norm": 1.0106829137077165,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 4400
    },
    {
      "epoch": 0.04401,
      "grad_norm": 0.9222214369606679,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 4401
    },
    {
      "epoch": 0.04402,
      "grad_norm": 0.8861574452686165,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 4402
    },
    {
      "epoch": 0.04403,
      "grad_norm": 0.9244720061111934,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 4403
    },
    {
      "epoch": 0.04404,
      "grad_norm": 1.0163229703427172,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 4404
    },
    {
      "epoch": 0.04405,
      "grad_norm": 1.112663606677766,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 4405
    },
    {
      "epoch": 0.04406,
      "grad_norm": 0.7836665936004462,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 4406
    },
    {
      "epoch": 0.04407,
      "grad_norm": 0.6834208693181686,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 4407
    },
    {
      "epoch": 0.04408,
      "grad_norm": 0.7215319828238491,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 4408
    },
    {
      "epoch": 0.04409,
      "grad_norm": 0.7547734730978946,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 4409
    },
    {
      "epoch": 0.0441,
      "grad_norm": 0.9855505388299907,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 4410
    },
    {
      "epoch": 0.04411,
      "grad_norm": 1.3745879066451838,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4411
    },
    {
      "epoch": 0.04412,
      "grad_norm": 0.6697260575539041,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 4412
    },
    {
      "epoch": 0.04413,
      "grad_norm": 0.7289649196117617,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 4413
    },
    {
      "epoch": 0.04414,
      "grad_norm": 0.8710542961593792,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 4414
    },
    {
      "epoch": 0.04415,
      "grad_norm": 1.006707768649903,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 4415
    },
    {
      "epoch": 0.04416,
      "grad_norm": 0.9528175763218202,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 4416
    },
    {
      "epoch": 0.04417,
      "grad_norm": 0.8424334779705969,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 4417
    },
    {
      "epoch": 0.04418,
      "grad_norm": 0.97351224925848,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 4418
    },
    {
      "epoch": 0.04419,
      "grad_norm": 1.0808697355237324,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 4419
    },
    {
      "epoch": 0.0442,
      "grad_norm": 1.167054123021739,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 4420
    },
    {
      "epoch": 0.04421,
      "grad_norm": 0.8176085170190497,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 4421
    },
    {
      "epoch": 0.04422,
      "grad_norm": 0.7560953731068145,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 4422
    },
    {
      "epoch": 0.04423,
      "grad_norm": 0.8430900886347851,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 4423
    },
    {
      "epoch": 0.04424,
      "grad_norm": 0.7698126783686144,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 4424
    },
    {
      "epoch": 0.04425,
      "grad_norm": 0.7635205731836768,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 4425
    },
    {
      "epoch": 0.04426,
      "grad_norm": 0.7393576832666733,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 4426
    },
    {
      "epoch": 0.04427,
      "grad_norm": 0.7066164659139067,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 4427
    },
    {
      "epoch": 0.04428,
      "grad_norm": 0.584092377564762,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 4428
    },
    {
      "epoch": 0.04429,
      "grad_norm": 0.5955745561562374,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 4429
    },
    {
      "epoch": 0.0443,
      "grad_norm": 0.7642349609553867,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 4430
    },
    {
      "epoch": 0.04431,
      "grad_norm": 0.9897440622578424,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 4431
    },
    {
      "epoch": 0.04432,
      "grad_norm": 1.3796673035154565,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 4432
    },
    {
      "epoch": 0.04433,
      "grad_norm": 0.7338783584401088,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 4433
    },
    {
      "epoch": 0.04434,
      "grad_norm": 0.9478779466631931,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 4434
    },
    {
      "epoch": 0.04435,
      "grad_norm": 1.0373098684540567,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 4435
    },
    {
      "epoch": 0.04436,
      "grad_norm": 0.9002494417955366,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 4436
    },
    {
      "epoch": 0.04437,
      "grad_norm": 0.9423712815921459,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 4437
    },
    {
      "epoch": 0.04438,
      "grad_norm": 0.9881181207836567,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 4438
    },
    {
      "epoch": 0.04439,
      "grad_norm": 1.140055930440485,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 4439
    },
    {
      "epoch": 0.0444,
      "grad_norm": 0.8857720718409576,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 4440
    },
    {
      "epoch": 0.04441,
      "grad_norm": 0.9085661359962524,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 4441
    },
    {
      "epoch": 0.04442,
      "grad_norm": 0.8204868786177558,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 4442
    },
    {
      "epoch": 0.04443,
      "grad_norm": 0.6457268453463154,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 4443
    },
    {
      "epoch": 0.04444,
      "grad_norm": 0.6492396831004539,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 4444
    },
    {
      "epoch": 0.04445,
      "grad_norm": 0.6928510619497573,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 4445
    },
    {
      "epoch": 0.04446,
      "grad_norm": 0.851264848376963,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 4446
    },
    {
      "epoch": 0.04447,
      "grad_norm": 0.9403414167914219,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 4447
    },
    {
      "epoch": 0.04448,
      "grad_norm": 1.0475208155252718,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 4448
    },
    {
      "epoch": 0.04449,
      "grad_norm": 0.8839010679595062,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 4449
    },
    {
      "epoch": 0.0445,
      "grad_norm": 0.9663307778578972,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 4450
    },
    {
      "epoch": 0.04451,
      "grad_norm": 1.0371788520078717,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 4451
    },
    {
      "epoch": 0.04452,
      "grad_norm": 1.2078981755286253,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 4452
    },
    {
      "epoch": 0.04453,
      "grad_norm": 0.8296222855961709,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 4453
    },
    {
      "epoch": 0.04454,
      "grad_norm": 0.8721163575738796,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 4454
    },
    {
      "epoch": 0.04455,
      "grad_norm": 0.9017669849588411,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 4455
    },
    {
      "epoch": 0.04456,
      "grad_norm": 1.0543326711835521,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 4456
    },
    {
      "epoch": 0.04457,
      "grad_norm": 1.1758149593074942,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 4457
    },
    {
      "epoch": 0.04458,
      "grad_norm": 0.795675945576436,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 4458
    },
    {
      "epoch": 0.04459,
      "grad_norm": 0.8944676165743426,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 4459
    },
    {
      "epoch": 0.0446,
      "grad_norm": 1.0309911114547456,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 4460
    },
    {
      "epoch": 0.04461,
      "grad_norm": 1.1633164535503087,
      "learning_rate": 0.003,
      "loss": 4.1207,
      "step": 4461
    },
    {
      "epoch": 0.04462,
      "grad_norm": 1.167208135838096,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 4462
    },
    {
      "epoch": 0.04463,
      "grad_norm": 1.183108803762208,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 4463
    },
    {
      "epoch": 0.04464,
      "grad_norm": 0.7854170437651848,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 4464
    },
    {
      "epoch": 0.04465,
      "grad_norm": 0.8018127380897019,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 4465
    },
    {
      "epoch": 0.04466,
      "grad_norm": 0.7724293795857595,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 4466
    },
    {
      "epoch": 0.04467,
      "grad_norm": 0.8927290074378922,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 4467
    },
    {
      "epoch": 0.04468,
      "grad_norm": 0.9529441187417813,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 4468
    },
    {
      "epoch": 0.04469,
      "grad_norm": 0.7635512052136587,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 4469
    },
    {
      "epoch": 0.0447,
      "grad_norm": 0.7111288881650668,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 4470
    },
    {
      "epoch": 0.04471,
      "grad_norm": 0.6842975948143225,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 4471
    },
    {
      "epoch": 0.04472,
      "grad_norm": 0.6721731379887363,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 4472
    },
    {
      "epoch": 0.04473,
      "grad_norm": 0.8269116687874477,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 4473
    },
    {
      "epoch": 0.04474,
      "grad_norm": 0.9372410627748204,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 4474
    },
    {
      "epoch": 0.04475,
      "grad_norm": 1.1298342557384584,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 4475
    },
    {
      "epoch": 0.04476,
      "grad_norm": 0.8988046555647741,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 4476
    },
    {
      "epoch": 0.04477,
      "grad_norm": 0.795000264212953,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 4477
    },
    {
      "epoch": 0.04478,
      "grad_norm": 0.8406418984097118,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 4478
    },
    {
      "epoch": 0.04479,
      "grad_norm": 0.9513259886842036,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 4479
    },
    {
      "epoch": 0.0448,
      "grad_norm": 1.0486642112843845,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 4480
    },
    {
      "epoch": 0.04481,
      "grad_norm": 1.0910923440009397,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 4481
    },
    {
      "epoch": 0.04482,
      "grad_norm": 0.9923003720594974,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 4482
    },
    {
      "epoch": 0.04483,
      "grad_norm": 0.9838181891048627,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 4483
    },
    {
      "epoch": 0.04484,
      "grad_norm": 0.8441164141274783,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 4484
    },
    {
      "epoch": 0.04485,
      "grad_norm": 0.8006899110895409,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 4485
    },
    {
      "epoch": 0.04486,
      "grad_norm": 0.8707281199253866,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 4486
    },
    {
      "epoch": 0.04487,
      "grad_norm": 0.8680647327916373,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 4487
    },
    {
      "epoch": 0.04488,
      "grad_norm": 0.8736811662681273,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 4488
    },
    {
      "epoch": 0.04489,
      "grad_norm": 0.8803676696260273,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 4489
    },
    {
      "epoch": 0.0449,
      "grad_norm": 0.9871186527446516,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 4490
    },
    {
      "epoch": 0.04491,
      "grad_norm": 1.0838956165086664,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 4491
    },
    {
      "epoch": 0.04492,
      "grad_norm": 1.0246550076014433,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 4492
    },
    {
      "epoch": 0.04493,
      "grad_norm": 1.119573546351604,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 4493
    },
    {
      "epoch": 0.04494,
      "grad_norm": 0.9017080906502098,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 4494
    },
    {
      "epoch": 0.04495,
      "grad_norm": 0.8048737469068201,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 4495
    },
    {
      "epoch": 0.04496,
      "grad_norm": 0.8207659865646967,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 4496
    },
    {
      "epoch": 0.04497,
      "grad_norm": 1.1245269296552654,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 4497
    },
    {
      "epoch": 0.04498,
      "grad_norm": 1.0002119651690387,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 4498
    },
    {
      "epoch": 0.04499,
      "grad_norm": 0.9808448366317947,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 4499
    },
    {
      "epoch": 0.045,
      "grad_norm": 1.132093441653165,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 4500
    },
    {
      "epoch": 0.04501,
      "grad_norm": 0.8718373099791662,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 4501
    },
    {
      "epoch": 0.04502,
      "grad_norm": 0.9228626903708667,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 4502
    },
    {
      "epoch": 0.04503,
      "grad_norm": 0.8263232740111328,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 4503
    },
    {
      "epoch": 0.04504,
      "grad_norm": 0.7734875104374885,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 4504
    },
    {
      "epoch": 0.04505,
      "grad_norm": 0.7885987258017754,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 4505
    },
    {
      "epoch": 0.04506,
      "grad_norm": 0.9407227338991642,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 4506
    },
    {
      "epoch": 0.04507,
      "grad_norm": 1.0832398695950372,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 4507
    },
    {
      "epoch": 0.04508,
      "grad_norm": 0.9418456435744189,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 4508
    },
    {
      "epoch": 0.04509,
      "grad_norm": 0.9175750111895549,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 4509
    },
    {
      "epoch": 0.0451,
      "grad_norm": 0.9398070401461067,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 4510
    },
    {
      "epoch": 0.04511,
      "grad_norm": 0.9217534503191119,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 4511
    },
    {
      "epoch": 0.04512,
      "grad_norm": 1.0427993141187701,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 4512
    },
    {
      "epoch": 0.04513,
      "grad_norm": 0.8428325423508638,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 4513
    },
    {
      "epoch": 0.04514,
      "grad_norm": 1.0089526014399501,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 4514
    },
    {
      "epoch": 0.04515,
      "grad_norm": 0.9789233122430068,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 4515
    },
    {
      "epoch": 0.04516,
      "grad_norm": 0.9091489043065707,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 4516
    },
    {
      "epoch": 0.04517,
      "grad_norm": 0.9751493943869993,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 4517
    },
    {
      "epoch": 0.04518,
      "grad_norm": 0.8917249628104398,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 4518
    },
    {
      "epoch": 0.04519,
      "grad_norm": 1.004402772465955,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 4519
    },
    {
      "epoch": 0.0452,
      "grad_norm": 1.2586466593520984,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 4520
    },
    {
      "epoch": 0.04521,
      "grad_norm": 0.9330729244910515,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 4521
    },
    {
      "epoch": 0.04522,
      "grad_norm": 0.8641019927023269,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 4522
    },
    {
      "epoch": 0.04523,
      "grad_norm": 0.9140037813457487,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 4523
    },
    {
      "epoch": 0.04524,
      "grad_norm": 0.8919093436216252,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 4524
    },
    {
      "epoch": 0.04525,
      "grad_norm": 1.0121384333442254,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 4525
    },
    {
      "epoch": 0.04526,
      "grad_norm": 1.1210191689178208,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 4526
    },
    {
      "epoch": 0.04527,
      "grad_norm": 0.682641155436752,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 4527
    },
    {
      "epoch": 0.04528,
      "grad_norm": 0.6585540834263701,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 4528
    },
    {
      "epoch": 0.04529,
      "grad_norm": 0.7678794616279656,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 4529
    },
    {
      "epoch": 0.0453,
      "grad_norm": 0.8419648665385003,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 4530
    },
    {
      "epoch": 0.04531,
      "grad_norm": 0.9205297479236283,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 4531
    },
    {
      "epoch": 0.04532,
      "grad_norm": 0.8853526961425466,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 4532
    },
    {
      "epoch": 0.04533,
      "grad_norm": 0.8992375264745746,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 4533
    },
    {
      "epoch": 0.04534,
      "grad_norm": 0.9811346625863182,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 4534
    },
    {
      "epoch": 0.04535,
      "grad_norm": 1.037436568279496,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 4535
    },
    {
      "epoch": 0.04536,
      "grad_norm": 0.8288199794219244,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 4536
    },
    {
      "epoch": 0.04537,
      "grad_norm": 0.8088616928239108,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4537
    },
    {
      "epoch": 0.04538,
      "grad_norm": 0.7318781047913877,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 4538
    },
    {
      "epoch": 0.04539,
      "grad_norm": 0.7179435097962413,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 4539
    },
    {
      "epoch": 0.0454,
      "grad_norm": 0.8058129172445117,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 4540
    },
    {
      "epoch": 0.04541,
      "grad_norm": 0.9730325188123338,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 4541
    },
    {
      "epoch": 0.04542,
      "grad_norm": 1.1629088900069842,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 4542
    },
    {
      "epoch": 0.04543,
      "grad_norm": 1.0223952095742048,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 4543
    },
    {
      "epoch": 0.04544,
      "grad_norm": 0.9007892884805351,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 4544
    },
    {
      "epoch": 0.04545,
      "grad_norm": 0.8810595549441049,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 4545
    },
    {
      "epoch": 0.04546,
      "grad_norm": 1.0706790887149014,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 4546
    },
    {
      "epoch": 0.04547,
      "grad_norm": 1.0783793172148153,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 4547
    },
    {
      "epoch": 0.04548,
      "grad_norm": 0.8754970851766884,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 4548
    },
    {
      "epoch": 0.04549,
      "grad_norm": 1.006047833218977,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 4549
    },
    {
      "epoch": 0.0455,
      "grad_norm": 1.0770476162131184,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 4550
    },
    {
      "epoch": 0.04551,
      "grad_norm": 0.9601215888010274,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 4551
    },
    {
      "epoch": 0.04552,
      "grad_norm": 0.9774413017455548,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 4552
    },
    {
      "epoch": 0.04553,
      "grad_norm": 1.0213364834049192,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 4553
    },
    {
      "epoch": 0.04554,
      "grad_norm": 0.9568143904158197,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 4554
    },
    {
      "epoch": 0.04555,
      "grad_norm": 0.7973060747821642,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 4555
    },
    {
      "epoch": 0.04556,
      "grad_norm": 0.9299995488489511,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 4556
    },
    {
      "epoch": 0.04557,
      "grad_norm": 1.1293974930486246,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 4557
    },
    {
      "epoch": 0.04558,
      "grad_norm": 1.026675541394777,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 4558
    },
    {
      "epoch": 0.04559,
      "grad_norm": 1.0345981426986643,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 4559
    },
    {
      "epoch": 0.0456,
      "grad_norm": 1.1305484499753016,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 4560
    },
    {
      "epoch": 0.04561,
      "grad_norm": 0.925988269592052,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 4561
    },
    {
      "epoch": 0.04562,
      "grad_norm": 0.8510922703082149,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 4562
    },
    {
      "epoch": 0.04563,
      "grad_norm": 0.9226408925609122,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 4563
    },
    {
      "epoch": 0.04564,
      "grad_norm": 1.0130794105331695,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 4564
    },
    {
      "epoch": 0.04565,
      "grad_norm": 1.1969435548126093,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 4565
    },
    {
      "epoch": 0.04566,
      "grad_norm": 0.9275288015749137,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 4566
    },
    {
      "epoch": 0.04567,
      "grad_norm": 0.8651130584077846,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 4567
    },
    {
      "epoch": 0.04568,
      "grad_norm": 0.8986618248546338,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 4568
    },
    {
      "epoch": 0.04569,
      "grad_norm": 0.6837244864462902,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 4569
    },
    {
      "epoch": 0.0457,
      "grad_norm": 0.6768551414670718,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 4570
    },
    {
      "epoch": 0.04571,
      "grad_norm": 0.7428397016306537,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 4571
    },
    {
      "epoch": 0.04572,
      "grad_norm": 0.8995755995048413,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 4572
    },
    {
      "epoch": 0.04573,
      "grad_norm": 0.9009642352606402,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 4573
    },
    {
      "epoch": 0.04574,
      "grad_norm": 0.7114211134538346,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 4574
    },
    {
      "epoch": 0.04575,
      "grad_norm": 0.6599729401731194,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 4575
    },
    {
      "epoch": 0.04576,
      "grad_norm": 0.7597125240743299,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 4576
    },
    {
      "epoch": 0.04577,
      "grad_norm": 0.8421667189567135,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 4577
    },
    {
      "epoch": 0.04578,
      "grad_norm": 0.8104388984795634,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 4578
    },
    {
      "epoch": 0.04579,
      "grad_norm": 0.8976119076785662,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 4579
    },
    {
      "epoch": 0.0458,
      "grad_norm": 0.9887238152154557,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 4580
    },
    {
      "epoch": 0.04581,
      "grad_norm": 1.0944325064747162,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 4581
    },
    {
      "epoch": 0.04582,
      "grad_norm": 1.1228836648844638,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 4582
    },
    {
      "epoch": 0.04583,
      "grad_norm": 1.037982851445284,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 4583
    },
    {
      "epoch": 0.04584,
      "grad_norm": 1.1309942180537078,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4584
    },
    {
      "epoch": 0.04585,
      "grad_norm": 0.8719784933921241,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 4585
    },
    {
      "epoch": 0.04586,
      "grad_norm": 0.9988395861483548,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 4586
    },
    {
      "epoch": 0.04587,
      "grad_norm": 1.1757119262154812,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 4587
    },
    {
      "epoch": 0.04588,
      "grad_norm": 0.9935517321787154,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 4588
    },
    {
      "epoch": 0.04589,
      "grad_norm": 0.9699416826899608,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 4589
    },
    {
      "epoch": 0.0459,
      "grad_norm": 0.8608811328558719,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 4590
    },
    {
      "epoch": 0.04591,
      "grad_norm": 0.8189922265739307,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 4591
    },
    {
      "epoch": 0.04592,
      "grad_norm": 0.7122732264854349,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 4592
    },
    {
      "epoch": 0.04593,
      "grad_norm": 0.6916533373498136,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 4593
    },
    {
      "epoch": 0.04594,
      "grad_norm": 0.7141399590985198,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 4594
    },
    {
      "epoch": 0.04595,
      "grad_norm": 0.5874937539781525,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 4595
    },
    {
      "epoch": 0.04596,
      "grad_norm": 0.5901031684112441,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 4596
    },
    {
      "epoch": 0.04597,
      "grad_norm": 0.6145935292761844,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 4597
    },
    {
      "epoch": 0.04598,
      "grad_norm": 0.6900732502328394,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 4598
    },
    {
      "epoch": 0.04599,
      "grad_norm": 0.7532465802179442,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 4599
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.7204851942827531,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 4600
    },
    {
      "epoch": 0.04601,
      "grad_norm": 0.8000251382960614,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 4601
    },
    {
      "epoch": 0.04602,
      "grad_norm": 0.9181816267947781,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 4602
    },
    {
      "epoch": 0.04603,
      "grad_norm": 1.1329954986924105,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 4603
    },
    {
      "epoch": 0.04604,
      "grad_norm": 0.9229244630258843,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 4604
    },
    {
      "epoch": 0.04605,
      "grad_norm": 0.9397750901932713,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 4605
    },
    {
      "epoch": 0.04606,
      "grad_norm": 0.9807853977592482,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 4606
    },
    {
      "epoch": 0.04607,
      "grad_norm": 1.0004066586342284,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 4607
    },
    {
      "epoch": 0.04608,
      "grad_norm": 0.8365719223227974,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 4608
    },
    {
      "epoch": 0.04609,
      "grad_norm": 0.8004776907383365,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 4609
    },
    {
      "epoch": 0.0461,
      "grad_norm": 1.0497099318779148,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 4610
    },
    {
      "epoch": 0.04611,
      "grad_norm": 1.2600264680618676,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 4611
    },
    {
      "epoch": 0.04612,
      "grad_norm": 0.7894125524130393,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 4612
    },
    {
      "epoch": 0.04613,
      "grad_norm": 0.8137383483366754,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 4613
    },
    {
      "epoch": 0.04614,
      "grad_norm": 1.0780550037519088,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 4614
    },
    {
      "epoch": 0.04615,
      "grad_norm": 1.146445498524777,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 4615
    },
    {
      "epoch": 0.04616,
      "grad_norm": 1.0658805778199005,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 4616
    },
    {
      "epoch": 0.04617,
      "grad_norm": 1.0441620047388833,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 4617
    },
    {
      "epoch": 0.04618,
      "grad_norm": 0.8556036446407603,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 4618
    },
    {
      "epoch": 0.04619,
      "grad_norm": 0.8711936990908583,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 4619
    },
    {
      "epoch": 0.0462,
      "grad_norm": 0.9909063580253566,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 4620
    },
    {
      "epoch": 0.04621,
      "grad_norm": 1.0674313514316285,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 4621
    },
    {
      "epoch": 0.04622,
      "grad_norm": 0.9135451914376392,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 4622
    },
    {
      "epoch": 0.04623,
      "grad_norm": 0.9086448162432442,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 4623
    },
    {
      "epoch": 0.04624,
      "grad_norm": 0.929118113001063,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 4624
    },
    {
      "epoch": 0.04625,
      "grad_norm": 0.8923285451003576,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 4625
    },
    {
      "epoch": 0.04626,
      "grad_norm": 0.8550528854318953,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 4626
    },
    {
      "epoch": 0.04627,
      "grad_norm": 0.8738286232684426,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 4627
    },
    {
      "epoch": 0.04628,
      "grad_norm": 1.040219821170605,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 4628
    },
    {
      "epoch": 0.04629,
      "grad_norm": 1.0457108285440184,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 4629
    },
    {
      "epoch": 0.0463,
      "grad_norm": 1.0237213701996026,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 4630
    },
    {
      "epoch": 0.04631,
      "grad_norm": 0.903960943241525,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 4631
    },
    {
      "epoch": 0.04632,
      "grad_norm": 0.848296335406008,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 4632
    },
    {
      "epoch": 0.04633,
      "grad_norm": 0.9208369707573915,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 4633
    },
    {
      "epoch": 0.04634,
      "grad_norm": 0.9324467970591866,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 4634
    },
    {
      "epoch": 0.04635,
      "grad_norm": 1.1007094021276311,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 4635
    },
    {
      "epoch": 0.04636,
      "grad_norm": 1.1761724983902668,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 4636
    },
    {
      "epoch": 0.04637,
      "grad_norm": 0.8928241988529365,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 4637
    },
    {
      "epoch": 0.04638,
      "grad_norm": 0.7909525696684022,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 4638
    },
    {
      "epoch": 0.04639,
      "grad_norm": 0.8316115510623557,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 4639
    },
    {
      "epoch": 0.0464,
      "grad_norm": 0.8890413282477022,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 4640
    },
    {
      "epoch": 0.04641,
      "grad_norm": 0.9442456572250497,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 4641
    },
    {
      "epoch": 0.04642,
      "grad_norm": 1.1131605186896782,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 4642
    },
    {
      "epoch": 0.04643,
      "grad_norm": 1.151049967466462,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 4643
    },
    {
      "epoch": 0.04644,
      "grad_norm": 1.0077381059472987,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 4644
    },
    {
      "epoch": 0.04645,
      "grad_norm": 0.8760793972978849,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 4645
    },
    {
      "epoch": 0.04646,
      "grad_norm": 0.8512107506066473,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 4646
    },
    {
      "epoch": 0.04647,
      "grad_norm": 0.8732029277276215,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 4647
    },
    {
      "epoch": 0.04648,
      "grad_norm": 0.9302078959957535,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 4648
    },
    {
      "epoch": 0.04649,
      "grad_norm": 0.9169568810760613,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 4649
    },
    {
      "epoch": 0.0465,
      "grad_norm": 0.9303804967387479,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 4650
    },
    {
      "epoch": 0.04651,
      "grad_norm": 0.872676445058754,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 4651
    },
    {
      "epoch": 0.04652,
      "grad_norm": 0.8018180143670698,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 4652
    },
    {
      "epoch": 0.04653,
      "grad_norm": 0.8064464814963113,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 4653
    },
    {
      "epoch": 0.04654,
      "grad_norm": 0.8944906023744553,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 4654
    },
    {
      "epoch": 0.04655,
      "grad_norm": 1.1219617714586472,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 4655
    },
    {
      "epoch": 0.04656,
      "grad_norm": 1.0550396369369868,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 4656
    },
    {
      "epoch": 0.04657,
      "grad_norm": 1.0337811454324184,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 4657
    },
    {
      "epoch": 0.04658,
      "grad_norm": 0.9390842279569726,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 4658
    },
    {
      "epoch": 0.04659,
      "grad_norm": 0.7870888779187648,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 4659
    },
    {
      "epoch": 0.0466,
      "grad_norm": 0.7813047740300597,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 4660
    },
    {
      "epoch": 0.04661,
      "grad_norm": 0.8185250491523086,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 4661
    },
    {
      "epoch": 0.04662,
      "grad_norm": 0.7657744046442502,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 4662
    },
    {
      "epoch": 0.04663,
      "grad_norm": 0.9612044009806899,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 4663
    },
    {
      "epoch": 0.04664,
      "grad_norm": 1.091240235491955,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 4664
    },
    {
      "epoch": 0.04665,
      "grad_norm": 0.9496659607676349,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 4665
    },
    {
      "epoch": 0.04666,
      "grad_norm": 0.8885966480506947,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 4666
    },
    {
      "epoch": 0.04667,
      "grad_norm": 0.8146510839872884,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 4667
    },
    {
      "epoch": 0.04668,
      "grad_norm": 0.7003726363991867,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 4668
    },
    {
      "epoch": 0.04669,
      "grad_norm": 0.6213740348621104,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 4669
    },
    {
      "epoch": 0.0467,
      "grad_norm": 0.7033914078971897,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 4670
    },
    {
      "epoch": 0.04671,
      "grad_norm": 0.6956570433878502,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 4671
    },
    {
      "epoch": 0.04672,
      "grad_norm": 0.7790564265361141,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 4672
    },
    {
      "epoch": 0.04673,
      "grad_norm": 0.8582519469717382,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 4673
    },
    {
      "epoch": 0.04674,
      "grad_norm": 1.0257732379563222,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 4674
    },
    {
      "epoch": 0.04675,
      "grad_norm": 1.0228753347895319,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4675
    },
    {
      "epoch": 0.04676,
      "grad_norm": 1.091796013099298,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 4676
    },
    {
      "epoch": 0.04677,
      "grad_norm": 0.9874564467619078,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 4677
    },
    {
      "epoch": 0.04678,
      "grad_norm": 0.9545956541830211,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 4678
    },
    {
      "epoch": 0.04679,
      "grad_norm": 0.9188392135996144,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 4679
    },
    {
      "epoch": 0.0468,
      "grad_norm": 0.9976774725576849,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 4680
    },
    {
      "epoch": 0.04681,
      "grad_norm": 1.2926044572885032,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 4681
    },
    {
      "epoch": 0.04682,
      "grad_norm": 0.8936432704892819,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 4682
    },
    {
      "epoch": 0.04683,
      "grad_norm": 0.9387765394338359,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 4683
    },
    {
      "epoch": 0.04684,
      "grad_norm": 1.0010607126157993,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 4684
    },
    {
      "epoch": 0.04685,
      "grad_norm": 1.1642588881453941,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 4685
    },
    {
      "epoch": 0.04686,
      "grad_norm": 0.9177452996715223,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 4686
    },
    {
      "epoch": 0.04687,
      "grad_norm": 1.0845507224363737,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 4687
    },
    {
      "epoch": 0.04688,
      "grad_norm": 1.0851979161146954,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 4688
    },
    {
      "epoch": 0.04689,
      "grad_norm": 1.0323716074650593,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 4689
    },
    {
      "epoch": 0.0469,
      "grad_norm": 0.8881158922609561,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 4690
    },
    {
      "epoch": 0.04691,
      "grad_norm": 0.7530974336635942,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 4691
    },
    {
      "epoch": 0.04692,
      "grad_norm": 0.8251300897824291,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 4692
    },
    {
      "epoch": 0.04693,
      "grad_norm": 1.1080787687675981,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 4693
    },
    {
      "epoch": 0.04694,
      "grad_norm": 1.2008844146719992,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 4694
    },
    {
      "epoch": 0.04695,
      "grad_norm": 0.8692807098125874,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 4695
    },
    {
      "epoch": 0.04696,
      "grad_norm": 0.7273592465913997,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 4696
    },
    {
      "epoch": 0.04697,
      "grad_norm": 0.791219049234071,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 4697
    },
    {
      "epoch": 0.04698,
      "grad_norm": 0.8259619113948167,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 4698
    },
    {
      "epoch": 0.04699,
      "grad_norm": 0.986873640288411,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 4699
    },
    {
      "epoch": 0.047,
      "grad_norm": 1.124829270421113,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 4700
    },
    {
      "epoch": 0.04701,
      "grad_norm": 0.8041528371793156,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 4701
    },
    {
      "epoch": 0.04702,
      "grad_norm": 0.9002565040000517,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 4702
    },
    {
      "epoch": 0.04703,
      "grad_norm": 0.928884750922695,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 4703
    },
    {
      "epoch": 0.04704,
      "grad_norm": 0.9517910705080795,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 4704
    },
    {
      "epoch": 0.04705,
      "grad_norm": 1.0793630137006167,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 4705
    },
    {
      "epoch": 0.04706,
      "grad_norm": 0.8932410896556339,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 4706
    },
    {
      "epoch": 0.04707,
      "grad_norm": 0.8273663973299694,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 4707
    },
    {
      "epoch": 0.04708,
      "grad_norm": 0.7707848347191034,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 4708
    },
    {
      "epoch": 0.04709,
      "grad_norm": 0.6881441521153875,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 4709
    },
    {
      "epoch": 0.0471,
      "grad_norm": 0.714094436830912,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 4710
    },
    {
      "epoch": 0.04711,
      "grad_norm": 0.8507678879908461,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 4711
    },
    {
      "epoch": 0.04712,
      "grad_norm": 0.8931516416205477,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 4712
    },
    {
      "epoch": 0.04713,
      "grad_norm": 0.9279444773212077,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 4713
    },
    {
      "epoch": 0.04714,
      "grad_norm": 0.7844617012393187,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 4714
    },
    {
      "epoch": 0.04715,
      "grad_norm": 0.7834150113825615,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 4715
    },
    {
      "epoch": 0.04716,
      "grad_norm": 0.7743590099580365,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 4716
    },
    {
      "epoch": 0.04717,
      "grad_norm": 0.7709401253853543,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 4717
    },
    {
      "epoch": 0.04718,
      "grad_norm": 0.8235615104288091,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 4718
    },
    {
      "epoch": 0.04719,
      "grad_norm": 0.8708271313882437,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 4719
    },
    {
      "epoch": 0.0472,
      "grad_norm": 0.8040720392649453,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 4720
    },
    {
      "epoch": 0.04721,
      "grad_norm": 0.7637937594189287,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 4721
    },
    {
      "epoch": 0.04722,
      "grad_norm": 0.7154347513877589,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 4722
    },
    {
      "epoch": 0.04723,
      "grad_norm": 0.8612068842939984,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 4723
    },
    {
      "epoch": 0.04724,
      "grad_norm": 0.9614536713142897,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 4724
    },
    {
      "epoch": 0.04725,
      "grad_norm": 1.0189566497638871,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 4725
    },
    {
      "epoch": 0.04726,
      "grad_norm": 1.0839491352411363,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 4726
    },
    {
      "epoch": 0.04727,
      "grad_norm": 1.0649373566906404,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 4727
    },
    {
      "epoch": 0.04728,
      "grad_norm": 0.867382918842643,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 4728
    },
    {
      "epoch": 0.04729,
      "grad_norm": 0.8648790898500417,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 4729
    },
    {
      "epoch": 0.0473,
      "grad_norm": 0.8269978275529186,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 4730
    },
    {
      "epoch": 0.04731,
      "grad_norm": 0.8941716511884031,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 4731
    },
    {
      "epoch": 0.04732,
      "grad_norm": 0.9280984555504275,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 4732
    },
    {
      "epoch": 0.04733,
      "grad_norm": 1.147397771954512,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 4733
    },
    {
      "epoch": 0.04734,
      "grad_norm": 0.9874050482010743,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 4734
    },
    {
      "epoch": 0.04735,
      "grad_norm": 1.2239662812431078,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 4735
    },
    {
      "epoch": 0.04736,
      "grad_norm": 0.9512556578156451,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 4736
    },
    {
      "epoch": 0.04737,
      "grad_norm": 1.1383559521224216,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 4737
    },
    {
      "epoch": 0.04738,
      "grad_norm": 0.9981707221550351,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 4738
    },
    {
      "epoch": 0.04739,
      "grad_norm": 0.9631032735800266,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 4739
    },
    {
      "epoch": 0.0474,
      "grad_norm": 0.8943309500832445,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 4740
    },
    {
      "epoch": 0.04741,
      "grad_norm": 0.9142912447416025,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 4741
    },
    {
      "epoch": 0.04742,
      "grad_norm": 0.9095935688180709,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 4742
    },
    {
      "epoch": 0.04743,
      "grad_norm": 0.9621787693657161,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 4743
    },
    {
      "epoch": 0.04744,
      "grad_norm": 0.983730502100782,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 4744
    },
    {
      "epoch": 0.04745,
      "grad_norm": 1.0519698186730737,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 4745
    },
    {
      "epoch": 0.04746,
      "grad_norm": 1.0692770481585765,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 4746
    },
    {
      "epoch": 0.04747,
      "grad_norm": 0.9800763430595273,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 4747
    },
    {
      "epoch": 0.04748,
      "grad_norm": 1.0447027226711574,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 4748
    },
    {
      "epoch": 0.04749,
      "grad_norm": 0.9780745361519866,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 4749
    },
    {
      "epoch": 0.0475,
      "grad_norm": 1.1823864036419054,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 4750
    },
    {
      "epoch": 0.04751,
      "grad_norm": 1.0317082296257587,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 4751
    },
    {
      "epoch": 0.04752,
      "grad_norm": 1.0191266508391366,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 4752
    },
    {
      "epoch": 0.04753,
      "grad_norm": 0.9650995183204055,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 4753
    },
    {
      "epoch": 0.04754,
      "grad_norm": 0.9626529774574477,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 4754
    },
    {
      "epoch": 0.04755,
      "grad_norm": 1.011836432397673,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 4755
    },
    {
      "epoch": 0.04756,
      "grad_norm": 0.9061088923081047,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 4756
    },
    {
      "epoch": 0.04757,
      "grad_norm": 0.8653623623227834,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 4757
    },
    {
      "epoch": 0.04758,
      "grad_norm": 0.8875207606926206,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 4758
    },
    {
      "epoch": 0.04759,
      "grad_norm": 0.8817482562664115,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 4759
    },
    {
      "epoch": 0.0476,
      "grad_norm": 0.7350149992314764,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 4760
    },
    {
      "epoch": 0.04761,
      "grad_norm": 0.6886778817907339,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 4761
    },
    {
      "epoch": 0.04762,
      "grad_norm": 0.6706585457203895,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 4762
    },
    {
      "epoch": 0.04763,
      "grad_norm": 0.7887867887553318,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 4763
    },
    {
      "epoch": 0.04764,
      "grad_norm": 0.97596832551055,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 4764
    },
    {
      "epoch": 0.04765,
      "grad_norm": 1.2783386527166918,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 4765
    },
    {
      "epoch": 0.04766,
      "grad_norm": 0.87011816316084,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 4766
    },
    {
      "epoch": 0.04767,
      "grad_norm": 0.995725590420577,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 4767
    },
    {
      "epoch": 0.04768,
      "grad_norm": 0.9157007548076511,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 4768
    },
    {
      "epoch": 0.04769,
      "grad_norm": 0.7763424443251495,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 4769
    },
    {
      "epoch": 0.0477,
      "grad_norm": 0.9132161182733682,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 4770
    },
    {
      "epoch": 0.04771,
      "grad_norm": 1.0690816936012133,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 4771
    },
    {
      "epoch": 0.04772,
      "grad_norm": 1.0183011941792959,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 4772
    },
    {
      "epoch": 0.04773,
      "grad_norm": 0.9142174256967822,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 4773
    },
    {
      "epoch": 0.04774,
      "grad_norm": 0.7603001306934604,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 4774
    },
    {
      "epoch": 0.04775,
      "grad_norm": 0.8346482465320779,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 4775
    },
    {
      "epoch": 0.04776,
      "grad_norm": 0.9773364899564101,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 4776
    },
    {
      "epoch": 0.04777,
      "grad_norm": 1.0784655700916257,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 4777
    },
    {
      "epoch": 0.04778,
      "grad_norm": 0.9412592454712865,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 4778
    },
    {
      "epoch": 0.04779,
      "grad_norm": 1.0145035098418551,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 4779
    },
    {
      "epoch": 0.0478,
      "grad_norm": 1.0762920782151484,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 4780
    },
    {
      "epoch": 0.04781,
      "grad_norm": 0.8651372390591274,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 4781
    },
    {
      "epoch": 0.04782,
      "grad_norm": 0.8769680695281393,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 4782
    },
    {
      "epoch": 0.04783,
      "grad_norm": 0.9787359970897872,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 4783
    },
    {
      "epoch": 0.04784,
      "grad_norm": 0.9721858465952792,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 4784
    },
    {
      "epoch": 0.04785,
      "grad_norm": 1.0635527582181876,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 4785
    },
    {
      "epoch": 0.04786,
      "grad_norm": 0.9396709467834784,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 4786
    },
    {
      "epoch": 0.04787,
      "grad_norm": 1.00148310702931,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 4787
    },
    {
      "epoch": 0.04788,
      "grad_norm": 0.8743917541049051,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 4788
    },
    {
      "epoch": 0.04789,
      "grad_norm": 0.7884230141024988,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 4789
    },
    {
      "epoch": 0.0479,
      "grad_norm": 0.8498046311242973,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 4790
    },
    {
      "epoch": 0.04791,
      "grad_norm": 0.956319683384673,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 4791
    },
    {
      "epoch": 0.04792,
      "grad_norm": 1.228255623374456,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 4792
    },
    {
      "epoch": 0.04793,
      "grad_norm": 0.9294780478699636,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 4793
    },
    {
      "epoch": 0.04794,
      "grad_norm": 0.9672358732016223,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 4794
    },
    {
      "epoch": 0.04795,
      "grad_norm": 1.0525050179084454,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 4795
    },
    {
      "epoch": 0.04796,
      "grad_norm": 1.1544771283976443,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 4796
    },
    {
      "epoch": 0.04797,
      "grad_norm": 0.915501476651925,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 4797
    },
    {
      "epoch": 0.04798,
      "grad_norm": 0.8655114100315004,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 4798
    },
    {
      "epoch": 0.04799,
      "grad_norm": 0.9840921488311046,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 4799
    },
    {
      "epoch": 0.048,
      "grad_norm": 0.9567911011323519,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 4800
    },
    {
      "epoch": 0.04801,
      "grad_norm": 1.0260566498791253,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 4801
    },
    {
      "epoch": 0.04802,
      "grad_norm": 0.9765671371087857,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 4802
    },
    {
      "epoch": 0.04803,
      "grad_norm": 0.8954590032392303,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 4803
    },
    {
      "epoch": 0.04804,
      "grad_norm": 0.9397394535107887,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 4804
    },
    {
      "epoch": 0.04805,
      "grad_norm": 0.8330259017268677,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 4805
    },
    {
      "epoch": 0.04806,
      "grad_norm": 0.8897845335382429,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 4806
    },
    {
      "epoch": 0.04807,
      "grad_norm": 0.8951815464379674,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 4807
    },
    {
      "epoch": 0.04808,
      "grad_norm": 0.8091055618521581,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 4808
    },
    {
      "epoch": 0.04809,
      "grad_norm": 0.90919679772142,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 4809
    },
    {
      "epoch": 0.0481,
      "grad_norm": 1.0315510358211395,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 4810
    },
    {
      "epoch": 0.04811,
      "grad_norm": 1.0721233459919157,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 4811
    },
    {
      "epoch": 0.04812,
      "grad_norm": 1.0618859760248345,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 4812
    },
    {
      "epoch": 0.04813,
      "grad_norm": 0.9313083894483718,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 4813
    },
    {
      "epoch": 0.04814,
      "grad_norm": 0.8584027305263373,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 4814
    },
    {
      "epoch": 0.04815,
      "grad_norm": 0.8633671828950363,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 4815
    },
    {
      "epoch": 0.04816,
      "grad_norm": 1.0445614739609703,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4816
    },
    {
      "epoch": 0.04817,
      "grad_norm": 1.156557836130519,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 4817
    },
    {
      "epoch": 0.04818,
      "grad_norm": 0.9056636167430384,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 4818
    },
    {
      "epoch": 0.04819,
      "grad_norm": 0.9644804670241944,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 4819
    },
    {
      "epoch": 0.0482,
      "grad_norm": 0.8579108360881226,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 4820
    },
    {
      "epoch": 0.04821,
      "grad_norm": 0.738347738618665,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 4821
    },
    {
      "epoch": 0.04822,
      "grad_norm": 0.77578338268237,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 4822
    },
    {
      "epoch": 0.04823,
      "grad_norm": 0.7660224855432475,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 4823
    },
    {
      "epoch": 0.04824,
      "grad_norm": 0.7699061557992386,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 4824
    },
    {
      "epoch": 0.04825,
      "grad_norm": 0.9058101965573263,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 4825
    },
    {
      "epoch": 0.04826,
      "grad_norm": 1.1063436218286011,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 4826
    },
    {
      "epoch": 0.04827,
      "grad_norm": 0.8962605854230049,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 4827
    },
    {
      "epoch": 0.04828,
      "grad_norm": 0.9401425903734211,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 4828
    },
    {
      "epoch": 0.04829,
      "grad_norm": 1.008827045223568,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 4829
    },
    {
      "epoch": 0.0483,
      "grad_norm": 1.1759354719434816,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 4830
    },
    {
      "epoch": 0.04831,
      "grad_norm": 0.9149475371771189,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 4831
    },
    {
      "epoch": 0.04832,
      "grad_norm": 0.9417806047622538,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 4832
    },
    {
      "epoch": 0.04833,
      "grad_norm": 0.8568983398054711,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 4833
    },
    {
      "epoch": 0.04834,
      "grad_norm": 0.9118080863225161,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 4834
    },
    {
      "epoch": 0.04835,
      "grad_norm": 0.8727113022874836,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 4835
    },
    {
      "epoch": 0.04836,
      "grad_norm": 0.7282176510987858,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 4836
    },
    {
      "epoch": 0.04837,
      "grad_norm": 0.8743286431817264,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 4837
    },
    {
      "epoch": 0.04838,
      "grad_norm": 1.0886280382022706,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 4838
    },
    {
      "epoch": 0.04839,
      "grad_norm": 0.9289173758415538,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 4839
    },
    {
      "epoch": 0.0484,
      "grad_norm": 0.9689477229941422,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 4840
    },
    {
      "epoch": 0.04841,
      "grad_norm": 0.8968968888481653,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 4841
    },
    {
      "epoch": 0.04842,
      "grad_norm": 0.7785478768547701,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 4842
    },
    {
      "epoch": 0.04843,
      "grad_norm": 0.7603045256063694,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 4843
    },
    {
      "epoch": 0.04844,
      "grad_norm": 0.8780025270605276,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 4844
    },
    {
      "epoch": 0.04845,
      "grad_norm": 0.9254852484657723,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 4845
    },
    {
      "epoch": 0.04846,
      "grad_norm": 0.8987509251388802,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4846
    },
    {
      "epoch": 0.04847,
      "grad_norm": 0.8893955297373247,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 4847
    },
    {
      "epoch": 0.04848,
      "grad_norm": 0.8912302394312529,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 4848
    },
    {
      "epoch": 0.04849,
      "grad_norm": 0.8726877737099056,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4849
    },
    {
      "epoch": 0.0485,
      "grad_norm": 0.8314222830339668,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 4850
    },
    {
      "epoch": 0.04851,
      "grad_norm": 0.7822732329238694,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 4851
    },
    {
      "epoch": 0.04852,
      "grad_norm": 0.9092871934087126,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 4852
    },
    {
      "epoch": 0.04853,
      "grad_norm": 1.2348482197912367,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 4853
    },
    {
      "epoch": 0.04854,
      "grad_norm": 0.8979894181749922,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 4854
    },
    {
      "epoch": 0.04855,
      "grad_norm": 0.8934339928496606,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 4855
    },
    {
      "epoch": 0.04856,
      "grad_norm": 1.0136848244881436,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 4856
    },
    {
      "epoch": 0.04857,
      "grad_norm": 1.2509395241639578,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 4857
    },
    {
      "epoch": 0.04858,
      "grad_norm": 0.9372409373437549,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 4858
    },
    {
      "epoch": 0.04859,
      "grad_norm": 0.9546537840113466,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 4859
    },
    {
      "epoch": 0.0486,
      "grad_norm": 1.082853408289471,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 4860
    },
    {
      "epoch": 0.04861,
      "grad_norm": 1.01675025574686,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 4861
    },
    {
      "epoch": 0.04862,
      "grad_norm": 0.9176376091053398,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 4862
    },
    {
      "epoch": 0.04863,
      "grad_norm": 0.8568974528178754,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 4863
    },
    {
      "epoch": 0.04864,
      "grad_norm": 0.8166420481645057,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 4864
    },
    {
      "epoch": 0.04865,
      "grad_norm": 0.9234428400906274,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 4865
    },
    {
      "epoch": 0.04866,
      "grad_norm": 0.8738302868938767,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 4866
    },
    {
      "epoch": 0.04867,
      "grad_norm": 0.85532860997535,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 4867
    },
    {
      "epoch": 0.04868,
      "grad_norm": 0.7330290515083526,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 4868
    },
    {
      "epoch": 0.04869,
      "grad_norm": 0.7237705668329806,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 4869
    },
    {
      "epoch": 0.0487,
      "grad_norm": 0.7073551331896761,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 4870
    },
    {
      "epoch": 0.04871,
      "grad_norm": 0.7482228718063552,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 4871
    },
    {
      "epoch": 0.04872,
      "grad_norm": 0.7934599742401708,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 4872
    },
    {
      "epoch": 0.04873,
      "grad_norm": 0.8520994037973396,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 4873
    },
    {
      "epoch": 0.04874,
      "grad_norm": 0.9436390490861155,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 4874
    },
    {
      "epoch": 0.04875,
      "grad_norm": 1.3353781337660462,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 4875
    },
    {
      "epoch": 0.04876,
      "grad_norm": 0.8304614123526641,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 4876
    },
    {
      "epoch": 0.04877,
      "grad_norm": 0.7707719582887104,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 4877
    },
    {
      "epoch": 0.04878,
      "grad_norm": 0.9308445091065936,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 4878
    },
    {
      "epoch": 0.04879,
      "grad_norm": 0.9815137055251556,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 4879
    },
    {
      "epoch": 0.0488,
      "grad_norm": 1.3666093708712401,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 4880
    },
    {
      "epoch": 0.04881,
      "grad_norm": 0.8702473732106857,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 4881
    },
    {
      "epoch": 0.04882,
      "grad_norm": 0.879457851478898,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 4882
    },
    {
      "epoch": 0.04883,
      "grad_norm": 0.8303309342477992,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 4883
    },
    {
      "epoch": 0.04884,
      "grad_norm": 0.8052075077532388,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 4884
    },
    {
      "epoch": 0.04885,
      "grad_norm": 0.8973915391486328,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 4885
    },
    {
      "epoch": 0.04886,
      "grad_norm": 1.0019930120717506,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 4886
    },
    {
      "epoch": 0.04887,
      "grad_norm": 1.347620612956241,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 4887
    },
    {
      "epoch": 0.04888,
      "grad_norm": 0.8778759238009314,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 4888
    },
    {
      "epoch": 0.04889,
      "grad_norm": 1.0906727199530957,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 4889
    },
    {
      "epoch": 0.0489,
      "grad_norm": 0.9502197503744106,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 4890
    },
    {
      "epoch": 0.04891,
      "grad_norm": 0.9462880674223806,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 4891
    },
    {
      "epoch": 0.04892,
      "grad_norm": 0.9960546118887255,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 4892
    },
    {
      "epoch": 0.04893,
      "grad_norm": 0.9133067753769846,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 4893
    },
    {
      "epoch": 0.04894,
      "grad_norm": 0.910365622657426,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 4894
    },
    {
      "epoch": 0.04895,
      "grad_norm": 1.0152626137013607,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 4895
    },
    {
      "epoch": 0.04896,
      "grad_norm": 0.9625608021661926,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 4896
    },
    {
      "epoch": 0.04897,
      "grad_norm": 0.9603064678490546,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 4897
    },
    {
      "epoch": 0.04898,
      "grad_norm": 1.0125115002322624,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 4898
    },
    {
      "epoch": 0.04899,
      "grad_norm": 1.0091142549096461,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 4899
    },
    {
      "epoch": 0.049,
      "grad_norm": 1.1619884083457441,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 4900
    },
    {
      "epoch": 0.04901,
      "grad_norm": 0.8775379815040303,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 4901
    },
    {
      "epoch": 0.04902,
      "grad_norm": 0.793220630533508,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 4902
    },
    {
      "epoch": 0.04903,
      "grad_norm": 0.8017930562442611,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 4903
    },
    {
      "epoch": 0.04904,
      "grad_norm": 0.8507583142575733,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 4904
    },
    {
      "epoch": 0.04905,
      "grad_norm": 0.9366175720443591,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 4905
    },
    {
      "epoch": 0.04906,
      "grad_norm": 1.0894519512603444,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 4906
    },
    {
      "epoch": 0.04907,
      "grad_norm": 0.9566147882681758,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 4907
    },
    {
      "epoch": 0.04908,
      "grad_norm": 0.8813724661117732,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 4908
    },
    {
      "epoch": 0.04909,
      "grad_norm": 0.7697604768117718,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 4909
    },
    {
      "epoch": 0.0491,
      "grad_norm": 0.9551094520562223,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 4910
    },
    {
      "epoch": 0.04911,
      "grad_norm": 1.5155833386194069,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 4911
    },
    {
      "epoch": 0.04912,
      "grad_norm": 0.9025182185743093,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 4912
    },
    {
      "epoch": 0.04913,
      "grad_norm": 0.7963791253987615,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 4913
    },
    {
      "epoch": 0.04914,
      "grad_norm": 0.7975682531864906,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 4914
    },
    {
      "epoch": 0.04915,
      "grad_norm": 0.7680989535713021,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 4915
    },
    {
      "epoch": 0.04916,
      "grad_norm": 0.8541651719195734,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 4916
    },
    {
      "epoch": 0.04917,
      "grad_norm": 0.9306809194401052,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 4917
    },
    {
      "epoch": 0.04918,
      "grad_norm": 1.0201811580852054,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 4918
    },
    {
      "epoch": 0.04919,
      "grad_norm": 0.9164268845754078,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 4919
    },
    {
      "epoch": 0.0492,
      "grad_norm": 0.8305372570501433,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 4920
    },
    {
      "epoch": 0.04921,
      "grad_norm": 0.7707198235784101,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 4921
    },
    {
      "epoch": 0.04922,
      "grad_norm": 0.7928692496541254,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 4922
    },
    {
      "epoch": 0.04923,
      "grad_norm": 0.7521164440028039,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 4923
    },
    {
      "epoch": 0.04924,
      "grad_norm": 0.8188114735033863,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 4924
    },
    {
      "epoch": 0.04925,
      "grad_norm": 0.8564277609742044,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 4925
    },
    {
      "epoch": 0.04926,
      "grad_norm": 0.8486097281410112,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 4926
    },
    {
      "epoch": 0.04927,
      "grad_norm": 0.889369882812886,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 4927
    },
    {
      "epoch": 0.04928,
      "grad_norm": 1.040367513666876,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 4928
    },
    {
      "epoch": 0.04929,
      "grad_norm": 1.1046422380445349,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 4929
    },
    {
      "epoch": 0.0493,
      "grad_norm": 0.8664686841477672,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 4930
    },
    {
      "epoch": 0.04931,
      "grad_norm": 0.9761798460720659,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 4931
    },
    {
      "epoch": 0.04932,
      "grad_norm": 1.0969681843858505,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 4932
    },
    {
      "epoch": 0.04933,
      "grad_norm": 0.8634620800542036,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 4933
    },
    {
      "epoch": 0.04934,
      "grad_norm": 1.0167878960199042,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 4934
    },
    {
      "epoch": 0.04935,
      "grad_norm": 1.2105132259455145,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 4935
    },
    {
      "epoch": 0.04936,
      "grad_norm": 0.8823692534511746,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 4936
    },
    {
      "epoch": 0.04937,
      "grad_norm": 1.0035117633460957,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 4937
    },
    {
      "epoch": 0.04938,
      "grad_norm": 1.21119716146334,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 4938
    },
    {
      "epoch": 0.04939,
      "grad_norm": 0.9542017309667814,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 4939
    },
    {
      "epoch": 0.0494,
      "grad_norm": 0.9693778311510872,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 4940
    },
    {
      "epoch": 0.04941,
      "grad_norm": 1.1934773830149854,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 4941
    },
    {
      "epoch": 0.04942,
      "grad_norm": 0.9941479800765747,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 4942
    },
    {
      "epoch": 0.04943,
      "grad_norm": 1.2980963908573873,
      "learning_rate": 0.003,
      "loss": 4.1165,
      "step": 4943
    },
    {
      "epoch": 0.04944,
      "grad_norm": 0.7685905125841351,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 4944
    },
    {
      "epoch": 0.04945,
      "grad_norm": 0.77621337036946,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 4945
    },
    {
      "epoch": 0.04946,
      "grad_norm": 0.7935358182030233,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 4946
    },
    {
      "epoch": 0.04947,
      "grad_norm": 0.95777398203718,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 4947
    },
    {
      "epoch": 0.04948,
      "grad_norm": 1.0505854626251887,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 4948
    },
    {
      "epoch": 0.04949,
      "grad_norm": 0.9843835143059065,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 4949
    },
    {
      "epoch": 0.0495,
      "grad_norm": 0.9820238950682217,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 4950
    },
    {
      "epoch": 0.04951,
      "grad_norm": 1.0176777494089932,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 4951
    },
    {
      "epoch": 0.04952,
      "grad_norm": 0.8631784047281243,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 4952
    },
    {
      "epoch": 0.04953,
      "grad_norm": 0.8078946550955691,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 4953
    },
    {
      "epoch": 0.04954,
      "grad_norm": 0.8107838454898298,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 4954
    },
    {
      "epoch": 0.04955,
      "grad_norm": 0.6872102056615723,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 4955
    },
    {
      "epoch": 0.04956,
      "grad_norm": 0.7000353354694138,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 4956
    },
    {
      "epoch": 0.04957,
      "grad_norm": 0.768512912596234,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 4957
    },
    {
      "epoch": 0.04958,
      "grad_norm": 0.773414971171959,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 4958
    },
    {
      "epoch": 0.04959,
      "grad_norm": 0.6897656034365274,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 4959
    },
    {
      "epoch": 0.0496,
      "grad_norm": 0.5963565715617187,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 4960
    },
    {
      "epoch": 0.04961,
      "grad_norm": 0.6834698784405567,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 4961
    },
    {
      "epoch": 0.04962,
      "grad_norm": 0.7972061518569966,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 4962
    },
    {
      "epoch": 0.04963,
      "grad_norm": 0.8471174104945988,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 4963
    },
    {
      "epoch": 0.04964,
      "grad_norm": 0.8890963190037607,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 4964
    },
    {
      "epoch": 0.04965,
      "grad_norm": 0.9621708392835049,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 4965
    },
    {
      "epoch": 0.04966,
      "grad_norm": 1.2746050729963243,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 4966
    },
    {
      "epoch": 0.04967,
      "grad_norm": 0.9487241879206226,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 4967
    },
    {
      "epoch": 0.04968,
      "grad_norm": 1.0115561803434678,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 4968
    },
    {
      "epoch": 0.04969,
      "grad_norm": 1.0743944507868857,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 4969
    },
    {
      "epoch": 0.0497,
      "grad_norm": 0.9957497513808655,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 4970
    },
    {
      "epoch": 0.04971,
      "grad_norm": 0.9301130535679807,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 4971
    },
    {
      "epoch": 0.04972,
      "grad_norm": 1.0252366398011912,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 4972
    },
    {
      "epoch": 0.04973,
      "grad_norm": 1.0321786535910968,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 4973
    },
    {
      "epoch": 0.04974,
      "grad_norm": 1.3675268731180446,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 4974
    },
    {
      "epoch": 0.04975,
      "grad_norm": 0.9689102407574199,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 4975
    },
    {
      "epoch": 0.04976,
      "grad_norm": 1.034378936349661,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 4976
    },
    {
      "epoch": 0.04977,
      "grad_norm": 1.0659592144925003,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 4977
    },
    {
      "epoch": 0.04978,
      "grad_norm": 0.9463690418294757,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 4978
    },
    {
      "epoch": 0.04979,
      "grad_norm": 1.0778624556709335,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 4979
    },
    {
      "epoch": 0.0498,
      "grad_norm": 1.1912796103263936,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 4980
    },
    {
      "epoch": 0.04981,
      "grad_norm": 0.9428519138134729,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 4981
    },
    {
      "epoch": 0.04982,
      "grad_norm": 0.9114023013651604,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 4982
    },
    {
      "epoch": 0.04983,
      "grad_norm": 0.843362002569619,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 4983
    },
    {
      "epoch": 0.04984,
      "grad_norm": 0.7329533030111096,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 4984
    },
    {
      "epoch": 0.04985,
      "grad_norm": 0.8771158538637082,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 4985
    },
    {
      "epoch": 0.04986,
      "grad_norm": 0.9492314223910526,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 4986
    },
    {
      "epoch": 0.04987,
      "grad_norm": 0.9063125309075447,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 4987
    },
    {
      "epoch": 0.04988,
      "grad_norm": 0.8211391661395855,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 4988
    },
    {
      "epoch": 0.04989,
      "grad_norm": 0.87309817323828,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 4989
    },
    {
      "epoch": 0.0499,
      "grad_norm": 0.8139798258933807,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 4990
    },
    {
      "epoch": 0.04991,
      "grad_norm": 0.8390355952116492,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 4991
    },
    {
      "epoch": 0.04992,
      "grad_norm": 0.9935950349965598,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 4992
    },
    {
      "epoch": 0.04993,
      "grad_norm": 1.0610402805979944,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 4993
    },
    {
      "epoch": 0.04994,
      "grad_norm": 0.94882186728126,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 4994
    },
    {
      "epoch": 0.04995,
      "grad_norm": 0.867388049906827,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 4995
    },
    {
      "epoch": 0.04996,
      "grad_norm": 0.9650777431351639,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 4996
    },
    {
      "epoch": 0.04997,
      "grad_norm": 0.9684613017855864,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 4997
    },
    {
      "epoch": 0.04998,
      "grad_norm": 0.9878003417576896,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 4998
    },
    {
      "epoch": 0.04999,
      "grad_norm": 0.953046742937088,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 4999
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.844866115668994,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 5000
    },
    {
      "epoch": 0.05001,
      "grad_norm": 0.8155144329490257,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 5001
    },
    {
      "epoch": 0.05002,
      "grad_norm": 0.9071734968733114,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 5002
    },
    {
      "epoch": 0.05003,
      "grad_norm": 1.0523308479446838,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 5003
    },
    {
      "epoch": 0.05004,
      "grad_norm": 1.0673372138869082,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 5004
    },
    {
      "epoch": 0.05005,
      "grad_norm": 1.0190354983748227,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 5005
    },
    {
      "epoch": 0.05006,
      "grad_norm": 1.0502501422500765,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 5006
    },
    {
      "epoch": 0.05007,
      "grad_norm": 1.0050657203422344,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 5007
    },
    {
      "epoch": 0.05008,
      "grad_norm": 1.0632875671321045,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 5008
    },
    {
      "epoch": 0.05009,
      "grad_norm": 0.7483220321269478,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 5009
    },
    {
      "epoch": 0.0501,
      "grad_norm": 0.6604154006789746,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 5010
    },
    {
      "epoch": 0.05011,
      "grad_norm": 0.7127635769855144,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 5011
    },
    {
      "epoch": 0.05012,
      "grad_norm": 0.8658367408694297,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 5012
    },
    {
      "epoch": 0.05013,
      "grad_norm": 1.0526587712962345,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 5013
    },
    {
      "epoch": 0.05014,
      "grad_norm": 1.2523503854735638,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 5014
    },
    {
      "epoch": 0.05015,
      "grad_norm": 0.8154569818223713,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 5015
    },
    {
      "epoch": 0.05016,
      "grad_norm": 0.8907021793572791,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 5016
    },
    {
      "epoch": 0.05017,
      "grad_norm": 0.9052920115396309,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 5017
    },
    {
      "epoch": 0.05018,
      "grad_norm": 0.8754711897047392,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 5018
    },
    {
      "epoch": 0.05019,
      "grad_norm": 0.8774784593403933,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 5019
    },
    {
      "epoch": 0.0502,
      "grad_norm": 0.9389866910530438,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 5020
    },
    {
      "epoch": 0.05021,
      "grad_norm": 0.9591253397524557,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 5021
    },
    {
      "epoch": 0.05022,
      "grad_norm": 1.1105484396556573,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 5022
    },
    {
      "epoch": 0.05023,
      "grad_norm": 1.3065590449165418,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 5023
    },
    {
      "epoch": 0.05024,
      "grad_norm": 0.9093611617258934,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 5024
    },
    {
      "epoch": 0.05025,
      "grad_norm": 0.8149464138741495,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 5025
    },
    {
      "epoch": 0.05026,
      "grad_norm": 0.8063387216832264,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 5026
    },
    {
      "epoch": 0.05027,
      "grad_norm": 0.9928443146318517,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 5027
    },
    {
      "epoch": 0.05028,
      "grad_norm": 1.1022004638491405,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 5028
    },
    {
      "epoch": 0.05029,
      "grad_norm": 1.0254112827493802,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 5029
    },
    {
      "epoch": 0.0503,
      "grad_norm": 0.9739559101423857,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 5030
    },
    {
      "epoch": 0.05031,
      "grad_norm": 1.195596806455749,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 5031
    },
    {
      "epoch": 0.05032,
      "grad_norm": 0.8132570593113878,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 5032
    },
    {
      "epoch": 0.05033,
      "grad_norm": 0.7335990765850869,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 5033
    },
    {
      "epoch": 0.05034,
      "grad_norm": 0.6554999770414173,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 5034
    },
    {
      "epoch": 0.05035,
      "grad_norm": 0.7532736427609922,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 5035
    },
    {
      "epoch": 0.05036,
      "grad_norm": 0.8283547847263479,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 5036
    },
    {
      "epoch": 0.05037,
      "grad_norm": 0.9606336416914282,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 5037
    },
    {
      "epoch": 0.05038,
      "grad_norm": 1.1756105988578247,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 5038
    },
    {
      "epoch": 0.05039,
      "grad_norm": 0.7260800281879718,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 5039
    },
    {
      "epoch": 0.0504,
      "grad_norm": 0.6895678700118688,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 5040
    },
    {
      "epoch": 0.05041,
      "grad_norm": 0.9054512238934349,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 5041
    },
    {
      "epoch": 0.05042,
      "grad_norm": 1.0681246931889021,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 5042
    },
    {
      "epoch": 0.05043,
      "grad_norm": 1.0081790953059804,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 5043
    },
    {
      "epoch": 0.05044,
      "grad_norm": 1.0083278627521677,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 5044
    },
    {
      "epoch": 0.05045,
      "grad_norm": 1.1323410375959377,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 5045
    },
    {
      "epoch": 0.05046,
      "grad_norm": 1.038046747857265,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 5046
    },
    {
      "epoch": 0.05047,
      "grad_norm": 0.8846272273108629,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 5047
    },
    {
      "epoch": 0.05048,
      "grad_norm": 1.039375733492065,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 5048
    },
    {
      "epoch": 0.05049,
      "grad_norm": 1.0198096987245278,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 5049
    },
    {
      "epoch": 0.0505,
      "grad_norm": 0.9914651248213096,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 5050
    },
    {
      "epoch": 0.05051,
      "grad_norm": 0.9522133302767952,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 5051
    },
    {
      "epoch": 0.05052,
      "grad_norm": 1.020178995716012,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 5052
    },
    {
      "epoch": 0.05053,
      "grad_norm": 0.9136513702774933,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 5053
    },
    {
      "epoch": 0.05054,
      "grad_norm": 0.8516625497650816,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 5054
    },
    {
      "epoch": 0.05055,
      "grad_norm": 0.8444636192801542,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 5055
    },
    {
      "epoch": 0.05056,
      "grad_norm": 0.8646124548092219,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 5056
    },
    {
      "epoch": 0.05057,
      "grad_norm": 0.954021030827355,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 5057
    },
    {
      "epoch": 0.05058,
      "grad_norm": 1.1354179546437237,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 5058
    },
    {
      "epoch": 0.05059,
      "grad_norm": 0.8412247009608202,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 5059
    },
    {
      "epoch": 0.0506,
      "grad_norm": 0.8258030312088399,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 5060
    },
    {
      "epoch": 0.05061,
      "grad_norm": 0.8796678252375678,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 5061
    },
    {
      "epoch": 0.05062,
      "grad_norm": 0.9882911088154981,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 5062
    },
    {
      "epoch": 0.05063,
      "grad_norm": 0.9432186009346578,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 5063
    },
    {
      "epoch": 0.05064,
      "grad_norm": 0.9485814708559243,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 5064
    },
    {
      "epoch": 0.05065,
      "grad_norm": 0.9973027340770267,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 5065
    },
    {
      "epoch": 0.05066,
      "grad_norm": 1.055670001942788,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 5066
    },
    {
      "epoch": 0.05067,
      "grad_norm": 0.9244772620512077,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 5067
    },
    {
      "epoch": 0.05068,
      "grad_norm": 0.958855347779479,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 5068
    },
    {
      "epoch": 0.05069,
      "grad_norm": 0.9074306933330328,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 5069
    },
    {
      "epoch": 0.0507,
      "grad_norm": 0.7465736760883044,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 5070
    },
    {
      "epoch": 0.05071,
      "grad_norm": 0.7907220925527945,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 5071
    },
    {
      "epoch": 0.05072,
      "grad_norm": 0.8791832214881413,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 5072
    },
    {
      "epoch": 0.05073,
      "grad_norm": 1.1193497551932123,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 5073
    },
    {
      "epoch": 0.05074,
      "grad_norm": 1.007997199633059,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 5074
    },
    {
      "epoch": 0.05075,
      "grad_norm": 0.9210544932505602,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 5075
    },
    {
      "epoch": 0.05076,
      "grad_norm": 0.8971244157459003,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 5076
    },
    {
      "epoch": 0.05077,
      "grad_norm": 0.8845891734232929,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 5077
    },
    {
      "epoch": 0.05078,
      "grad_norm": 0.9113721083676786,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 5078
    },
    {
      "epoch": 0.05079,
      "grad_norm": 0.9084231790144179,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 5079
    },
    {
      "epoch": 0.0508,
      "grad_norm": 1.0477669246554797,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 5080
    },
    {
      "epoch": 0.05081,
      "grad_norm": 1.0274679159676723,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 5081
    },
    {
      "epoch": 0.05082,
      "grad_norm": 0.9811435796751755,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 5082
    },
    {
      "epoch": 0.05083,
      "grad_norm": 0.9115900027432353,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 5083
    },
    {
      "epoch": 0.05084,
      "grad_norm": 0.9665650959530513,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 5084
    },
    {
      "epoch": 0.05085,
      "grad_norm": 1.094785200544881,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 5085
    },
    {
      "epoch": 0.05086,
      "grad_norm": 1.0395235168756072,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 5086
    },
    {
      "epoch": 0.05087,
      "grad_norm": 1.1914983032220043,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 5087
    },
    {
      "epoch": 0.05088,
      "grad_norm": 0.9164887728560919,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 5088
    },
    {
      "epoch": 0.05089,
      "grad_norm": 0.9984418259866299,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 5089
    },
    {
      "epoch": 0.0509,
      "grad_norm": 0.9643661674080093,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 5090
    },
    {
      "epoch": 0.05091,
      "grad_norm": 0.9837794177673876,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 5091
    },
    {
      "epoch": 0.05092,
      "grad_norm": 0.8924921438341328,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 5092
    },
    {
      "epoch": 0.05093,
      "grad_norm": 0.8256552974904879,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 5093
    },
    {
      "epoch": 0.05094,
      "grad_norm": 0.9039163713516233,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 5094
    },
    {
      "epoch": 0.05095,
      "grad_norm": 1.2383238891133954,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 5095
    },
    {
      "epoch": 0.05096,
      "grad_norm": 0.9650892595888488,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 5096
    },
    {
      "epoch": 0.05097,
      "grad_norm": 0.9342147612808765,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 5097
    },
    {
      "epoch": 0.05098,
      "grad_norm": 0.9150581045088569,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 5098
    },
    {
      "epoch": 0.05099,
      "grad_norm": 1.0196257914961222,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 5099
    },
    {
      "epoch": 0.051,
      "grad_norm": 1.0100480797466045,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 5100
    },
    {
      "epoch": 0.05101,
      "grad_norm": 1.076613085076168,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 5101
    },
    {
      "epoch": 0.05102,
      "grad_norm": 0.9177055767593738,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 5102
    },
    {
      "epoch": 0.05103,
      "grad_norm": 0.7867724366196349,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 5103
    },
    {
      "epoch": 0.05104,
      "grad_norm": 0.8457986934357559,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 5104
    },
    {
      "epoch": 0.05105,
      "grad_norm": 0.8367744837015725,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 5105
    },
    {
      "epoch": 0.05106,
      "grad_norm": 0.8022124167980758,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 5106
    },
    {
      "epoch": 0.05107,
      "grad_norm": 0.6952811544982499,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 5107
    },
    {
      "epoch": 0.05108,
      "grad_norm": 0.7444855516396648,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 5108
    },
    {
      "epoch": 0.05109,
      "grad_norm": 0.878589896084288,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 5109
    },
    {
      "epoch": 0.0511,
      "grad_norm": 0.8745594675885457,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 5110
    },
    {
      "epoch": 0.05111,
      "grad_norm": 1.0139622357551754,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 5111
    },
    {
      "epoch": 0.05112,
      "grad_norm": 1.248129365292357,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 5112
    },
    {
      "epoch": 0.05113,
      "grad_norm": 0.9885210087032184,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 5113
    },
    {
      "epoch": 0.05114,
      "grad_norm": 0.9571971369820982,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 5114
    },
    {
      "epoch": 0.05115,
      "grad_norm": 0.9106967184247701,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 5115
    },
    {
      "epoch": 0.05116,
      "grad_norm": 1.053212984649482,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 5116
    },
    {
      "epoch": 0.05117,
      "grad_norm": 0.9762026130378781,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 5117
    },
    {
      "epoch": 0.05118,
      "grad_norm": 0.9813459473673732,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 5118
    },
    {
      "epoch": 0.05119,
      "grad_norm": 1.0553578473596843,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 5119
    },
    {
      "epoch": 0.0512,
      "grad_norm": 1.070915563107028,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 5120
    },
    {
      "epoch": 0.05121,
      "grad_norm": 0.8748371385007153,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 5121
    },
    {
      "epoch": 0.05122,
      "grad_norm": 0.960144764905716,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 5122
    },
    {
      "epoch": 0.05123,
      "grad_norm": 1.210628825057938,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 5123
    },
    {
      "epoch": 0.05124,
      "grad_norm": 0.9135871799109425,
      "learning_rate": 0.003,
      "loss": 4.1068,
      "step": 5124
    },
    {
      "epoch": 0.05125,
      "grad_norm": 0.9676901109206436,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 5125
    },
    {
      "epoch": 0.05126,
      "grad_norm": 0.8403551330424058,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 5126
    },
    {
      "epoch": 0.05127,
      "grad_norm": 0.7466734661308947,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 5127
    },
    {
      "epoch": 0.05128,
      "grad_norm": 0.728346628542258,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 5128
    },
    {
      "epoch": 0.05129,
      "grad_norm": 0.7017928229749225,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 5129
    },
    {
      "epoch": 0.0513,
      "grad_norm": 0.7221803684673601,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 5130
    },
    {
      "epoch": 0.05131,
      "grad_norm": 0.7808750308328987,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 5131
    },
    {
      "epoch": 0.05132,
      "grad_norm": 0.8913684947688636,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 5132
    },
    {
      "epoch": 0.05133,
      "grad_norm": 1.1454285750946989,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 5133
    },
    {
      "epoch": 0.05134,
      "grad_norm": 0.9684078269337082,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 5134
    },
    {
      "epoch": 0.05135,
      "grad_norm": 1.1615129798672017,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 5135
    },
    {
      "epoch": 0.05136,
      "grad_norm": 0.8816470981524503,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 5136
    },
    {
      "epoch": 0.05137,
      "grad_norm": 0.8808116644656396,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 5137
    },
    {
      "epoch": 0.05138,
      "grad_norm": 1.030677698421762,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 5138
    },
    {
      "epoch": 0.05139,
      "grad_norm": 1.1279946491267732,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 5139
    },
    {
      "epoch": 0.0514,
      "grad_norm": 0.9370222106532408,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 5140
    },
    {
      "epoch": 0.05141,
      "grad_norm": 0.8884938807254016,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 5141
    },
    {
      "epoch": 0.05142,
      "grad_norm": 0.840850706102145,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 5142
    },
    {
      "epoch": 0.05143,
      "grad_norm": 0.880595971401337,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 5143
    },
    {
      "epoch": 0.05144,
      "grad_norm": 0.8704477269765287,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 5144
    },
    {
      "epoch": 0.05145,
      "grad_norm": 0.8916851706228778,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 5145
    },
    {
      "epoch": 0.05146,
      "grad_norm": 0.9663208313115196,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 5146
    },
    {
      "epoch": 0.05147,
      "grad_norm": 1.1070241756865764,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 5147
    },
    {
      "epoch": 0.05148,
      "grad_norm": 0.9885155736285973,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 5148
    },
    {
      "epoch": 0.05149,
      "grad_norm": 0.9670853678419679,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 5149
    },
    {
      "epoch": 0.0515,
      "grad_norm": 0.9350123569517975,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 5150
    },
    {
      "epoch": 0.05151,
      "grad_norm": 0.9079989062981128,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 5151
    },
    {
      "epoch": 0.05152,
      "grad_norm": 0.9776926173785698,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 5152
    },
    {
      "epoch": 0.05153,
      "grad_norm": 0.9166249719052514,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 5153
    },
    {
      "epoch": 0.05154,
      "grad_norm": 0.8423591519799284,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 5154
    },
    {
      "epoch": 0.05155,
      "grad_norm": 0.9495784465505834,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 5155
    },
    {
      "epoch": 0.05156,
      "grad_norm": 0.8711852658243976,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 5156
    },
    {
      "epoch": 0.05157,
      "grad_norm": 0.9761077463552009,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 5157
    },
    {
      "epoch": 0.05158,
      "grad_norm": 0.9931352807114164,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 5158
    },
    {
      "epoch": 0.05159,
      "grad_norm": 1.1193695002600963,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 5159
    },
    {
      "epoch": 0.0516,
      "grad_norm": 0.9267022082064607,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 5160
    },
    {
      "epoch": 0.05161,
      "grad_norm": 0.9665280786599717,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 5161
    },
    {
      "epoch": 0.05162,
      "grad_norm": 1.0869042755370812,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 5162
    },
    {
      "epoch": 0.05163,
      "grad_norm": 1.0568255095175747,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 5163
    },
    {
      "epoch": 0.05164,
      "grad_norm": 1.1739265193923354,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 5164
    },
    {
      "epoch": 0.05165,
      "grad_norm": 0.9577726183728671,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 5165
    },
    {
      "epoch": 0.05166,
      "grad_norm": 1.1352998589608443,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 5166
    },
    {
      "epoch": 0.05167,
      "grad_norm": 0.8717433391514583,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 5167
    },
    {
      "epoch": 0.05168,
      "grad_norm": 0.870068697924481,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 5168
    },
    {
      "epoch": 0.05169,
      "grad_norm": 1.0023358246624934,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 5169
    },
    {
      "epoch": 0.0517,
      "grad_norm": 1.2431734452049883,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 5170
    },
    {
      "epoch": 0.05171,
      "grad_norm": 0.9116624433354724,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 5171
    },
    {
      "epoch": 0.05172,
      "grad_norm": 0.9323927967551716,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 5172
    },
    {
      "epoch": 0.05173,
      "grad_norm": 1.054805595580774,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 5173
    },
    {
      "epoch": 0.05174,
      "grad_norm": 1.122856751648051,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 5174
    },
    {
      "epoch": 0.05175,
      "grad_norm": 0.9937471915910451,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 5175
    },
    {
      "epoch": 0.05176,
      "grad_norm": 0.9840832744172103,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 5176
    },
    {
      "epoch": 0.05177,
      "grad_norm": 0.7800980213457716,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 5177
    },
    {
      "epoch": 0.05178,
      "grad_norm": 0.8673592583630511,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 5178
    },
    {
      "epoch": 0.05179,
      "grad_norm": 0.8980602578393677,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 5179
    },
    {
      "epoch": 0.0518,
      "grad_norm": 1.0782172804196768,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 5180
    },
    {
      "epoch": 0.05181,
      "grad_norm": 1.3393343782861993,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 5181
    },
    {
      "epoch": 0.05182,
      "grad_norm": 0.6874879870433082,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 5182
    },
    {
      "epoch": 0.05183,
      "grad_norm": 0.8933939115278953,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 5183
    },
    {
      "epoch": 0.05184,
      "grad_norm": 1.0375617190613013,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 5184
    },
    {
      "epoch": 0.05185,
      "grad_norm": 0.978363863806633,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 5185
    },
    {
      "epoch": 0.05186,
      "grad_norm": 0.9787815400755827,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 5186
    },
    {
      "epoch": 0.05187,
      "grad_norm": 1.026722875042982,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 5187
    },
    {
      "epoch": 0.05188,
      "grad_norm": 1.0387976995995625,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 5188
    },
    {
      "epoch": 0.05189,
      "grad_norm": 0.9737693143733961,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 5189
    },
    {
      "epoch": 0.0519,
      "grad_norm": 1.1745663438513876,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 5190
    },
    {
      "epoch": 0.05191,
      "grad_norm": 0.9594735851770623,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 5191
    },
    {
      "epoch": 0.05192,
      "grad_norm": 0.9706881201053492,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 5192
    },
    {
      "epoch": 0.05193,
      "grad_norm": 0.927921231735236,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 5193
    },
    {
      "epoch": 0.05194,
      "grad_norm": 0.8511889522904593,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 5194
    },
    {
      "epoch": 0.05195,
      "grad_norm": 0.8688574305845654,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 5195
    },
    {
      "epoch": 0.05196,
      "grad_norm": 1.0602526179506908,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 5196
    },
    {
      "epoch": 0.05197,
      "grad_norm": 1.004033518921005,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 5197
    },
    {
      "epoch": 0.05198,
      "grad_norm": 0.9382654504233616,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 5198
    },
    {
      "epoch": 0.05199,
      "grad_norm": 0.8240832698733139,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 5199
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.8010190091673817,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 5200
    },
    {
      "epoch": 0.05201,
      "grad_norm": 0.9345369763160736,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 5201
    },
    {
      "epoch": 0.05202,
      "grad_norm": 1.1219968562809288,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 5202
    },
    {
      "epoch": 0.05203,
      "grad_norm": 1.165320649672993,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 5203
    },
    {
      "epoch": 0.05204,
      "grad_norm": 1.0069940016760848,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 5204
    },
    {
      "epoch": 0.05205,
      "grad_norm": 0.9073269196171544,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 5205
    },
    {
      "epoch": 0.05206,
      "grad_norm": 0.9676783509672855,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 5206
    },
    {
      "epoch": 0.05207,
      "grad_norm": 0.9614251201683252,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 5207
    },
    {
      "epoch": 0.05208,
      "grad_norm": 0.8233264549032968,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 5208
    },
    {
      "epoch": 0.05209,
      "grad_norm": 0.795950781260021,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 5209
    },
    {
      "epoch": 0.0521,
      "grad_norm": 0.850100905132024,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 5210
    },
    {
      "epoch": 0.05211,
      "grad_norm": 1.0102855739696595,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 5211
    },
    {
      "epoch": 0.05212,
      "grad_norm": 1.3483238868956038,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 5212
    },
    {
      "epoch": 0.05213,
      "grad_norm": 0.771838626249321,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 5213
    },
    {
      "epoch": 0.05214,
      "grad_norm": 0.6993971591998677,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 5214
    },
    {
      "epoch": 0.05215,
      "grad_norm": 0.7581728248150723,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 5215
    },
    {
      "epoch": 0.05216,
      "grad_norm": 0.8519198915236995,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 5216
    },
    {
      "epoch": 0.05217,
      "grad_norm": 1.0745388491695431,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 5217
    },
    {
      "epoch": 0.05218,
      "grad_norm": 0.9935688948883226,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 5218
    },
    {
      "epoch": 0.05219,
      "grad_norm": 0.9084561291587792,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 5219
    },
    {
      "epoch": 0.0522,
      "grad_norm": 1.0337978058374222,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 5220
    },
    {
      "epoch": 0.05221,
      "grad_norm": 1.1422321793715118,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 5221
    },
    {
      "epoch": 0.05222,
      "grad_norm": 0.7554460463065314,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 5222
    },
    {
      "epoch": 0.05223,
      "grad_norm": 0.8000343955599867,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 5223
    },
    {
      "epoch": 0.05224,
      "grad_norm": 0.9379433719212857,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 5224
    },
    {
      "epoch": 0.05225,
      "grad_norm": 1.2417850745548258,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 5225
    },
    {
      "epoch": 0.05226,
      "grad_norm": 0.9382790639135026,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 5226
    },
    {
      "epoch": 0.05227,
      "grad_norm": 0.8974481342791866,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 5227
    },
    {
      "epoch": 0.05228,
      "grad_norm": 0.8629619700937163,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 5228
    },
    {
      "epoch": 0.05229,
      "grad_norm": 1.019751882064597,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 5229
    },
    {
      "epoch": 0.0523,
      "grad_norm": 1.1377445229403527,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 5230
    },
    {
      "epoch": 0.05231,
      "grad_norm": 0.9424656899524195,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 5231
    },
    {
      "epoch": 0.05232,
      "grad_norm": 0.9575611751186932,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 5232
    },
    {
      "epoch": 0.05233,
      "grad_norm": 0.9293995877028672,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 5233
    },
    {
      "epoch": 0.05234,
      "grad_norm": 0.9615259872002241,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 5234
    },
    {
      "epoch": 0.05235,
      "grad_norm": 0.983028393165795,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 5235
    },
    {
      "epoch": 0.05236,
      "grad_norm": 1.0016310432668707,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 5236
    },
    {
      "epoch": 0.05237,
      "grad_norm": 1.015432602850747,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 5237
    },
    {
      "epoch": 0.05238,
      "grad_norm": 1.078845385798474,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 5238
    },
    {
      "epoch": 0.05239,
      "grad_norm": 0.9307396208378222,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 5239
    },
    {
      "epoch": 0.0524,
      "grad_norm": 0.9571027247324106,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 5240
    },
    {
      "epoch": 0.05241,
      "grad_norm": 0.9792332364709184,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 5241
    },
    {
      "epoch": 0.05242,
      "grad_norm": 0.9182971281795369,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 5242
    },
    {
      "epoch": 0.05243,
      "grad_norm": 0.8942676834653476,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 5243
    },
    {
      "epoch": 0.05244,
      "grad_norm": 0.89594845212589,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 5244
    },
    {
      "epoch": 0.05245,
      "grad_norm": 1.0089126308943768,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 5245
    },
    {
      "epoch": 0.05246,
      "grad_norm": 1.192205008646422,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 5246
    },
    {
      "epoch": 0.05247,
      "grad_norm": 1.0822271048921281,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 5247
    },
    {
      "epoch": 0.05248,
      "grad_norm": 0.9132052877500991,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 5248
    },
    {
      "epoch": 0.05249,
      "grad_norm": 1.0439345558274598,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 5249
    },
    {
      "epoch": 0.0525,
      "grad_norm": 1.1063976766968928,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 5250
    },
    {
      "epoch": 0.05251,
      "grad_norm": 0.8427299394254306,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 5251
    },
    {
      "epoch": 0.05252,
      "grad_norm": 0.8460576745435954,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 5252
    },
    {
      "epoch": 0.05253,
      "grad_norm": 0.7631623954154223,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 5253
    },
    {
      "epoch": 0.05254,
      "grad_norm": 0.7582085848780449,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 5254
    },
    {
      "epoch": 0.05255,
      "grad_norm": 0.871269617254244,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 5255
    },
    {
      "epoch": 0.05256,
      "grad_norm": 1.1274865245345602,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 5256
    },
    {
      "epoch": 0.05257,
      "grad_norm": 0.9482986096135555,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 5257
    },
    {
      "epoch": 0.05258,
      "grad_norm": 0.8120437150184056,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 5258
    },
    {
      "epoch": 0.05259,
      "grad_norm": 0.7370127557005074,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 5259
    },
    {
      "epoch": 0.0526,
      "grad_norm": 0.8055148797864925,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 5260
    },
    {
      "epoch": 0.05261,
      "grad_norm": 0.7414676121854172,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 5261
    },
    {
      "epoch": 0.05262,
      "grad_norm": 0.8431409042373192,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 5262
    },
    {
      "epoch": 0.05263,
      "grad_norm": 0.9936473719599972,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 5263
    },
    {
      "epoch": 0.05264,
      "grad_norm": 1.2454811247723998,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 5264
    },
    {
      "epoch": 0.05265,
      "grad_norm": 0.7140907414977667,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 5265
    },
    {
      "epoch": 0.05266,
      "grad_norm": 0.6844293037667735,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 5266
    },
    {
      "epoch": 0.05267,
      "grad_norm": 0.7232891737003544,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 5267
    },
    {
      "epoch": 0.05268,
      "grad_norm": 0.8650861484246375,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 5268
    },
    {
      "epoch": 0.05269,
      "grad_norm": 1.078417956716715,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 5269
    },
    {
      "epoch": 0.0527,
      "grad_norm": 1.081957308191504,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 5270
    },
    {
      "epoch": 0.05271,
      "grad_norm": 1.0713979709889552,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 5271
    },
    {
      "epoch": 0.05272,
      "grad_norm": 0.9799954386145029,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 5272
    },
    {
      "epoch": 0.05273,
      "grad_norm": 1.1944522490266327,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 5273
    },
    {
      "epoch": 0.05274,
      "grad_norm": 0.9595893728165487,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 5274
    },
    {
      "epoch": 0.05275,
      "grad_norm": 1.0373746989136285,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 5275
    },
    {
      "epoch": 0.05276,
      "grad_norm": 0.9783693469288199,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 5276
    },
    {
      "epoch": 0.05277,
      "grad_norm": 0.9308459462602469,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 5277
    },
    {
      "epoch": 0.05278,
      "grad_norm": 0.9127924883155014,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 5278
    },
    {
      "epoch": 0.05279,
      "grad_norm": 0.8905127840769295,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 5279
    },
    {
      "epoch": 0.0528,
      "grad_norm": 0.9160560894552265,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 5280
    },
    {
      "epoch": 0.05281,
      "grad_norm": 0.9898967226775388,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 5281
    },
    {
      "epoch": 0.05282,
      "grad_norm": 1.0607654232659915,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 5282
    },
    {
      "epoch": 0.05283,
      "grad_norm": 0.7891736411399345,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 5283
    },
    {
      "epoch": 0.05284,
      "grad_norm": 0.7117985343229531,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 5284
    },
    {
      "epoch": 0.05285,
      "grad_norm": 0.7116143826691809,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 5285
    },
    {
      "epoch": 0.05286,
      "grad_norm": 0.7444977551621328,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 5286
    },
    {
      "epoch": 0.05287,
      "grad_norm": 0.7927132129772895,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 5287
    },
    {
      "epoch": 0.05288,
      "grad_norm": 0.8599837583361685,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 5288
    },
    {
      "epoch": 0.05289,
      "grad_norm": 1.030013828539183,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 5289
    },
    {
      "epoch": 0.0529,
      "grad_norm": 1.320070502048142,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 5290
    },
    {
      "epoch": 0.05291,
      "grad_norm": 0.708003930048573,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 5291
    },
    {
      "epoch": 0.05292,
      "grad_norm": 0.7099716170045386,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 5292
    },
    {
      "epoch": 0.05293,
      "grad_norm": 1.0571312622562001,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 5293
    },
    {
      "epoch": 0.05294,
      "grad_norm": 1.0749670114406467,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 5294
    },
    {
      "epoch": 0.05295,
      "grad_norm": 0.9466743696328925,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 5295
    },
    {
      "epoch": 0.05296,
      "grad_norm": 0.9334033172520125,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 5296
    },
    {
      "epoch": 0.05297,
      "grad_norm": 0.9085832862861088,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 5297
    },
    {
      "epoch": 0.05298,
      "grad_norm": 0.8733996457415404,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 5298
    },
    {
      "epoch": 0.05299,
      "grad_norm": 0.9180130832605389,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 5299
    },
    {
      "epoch": 0.053,
      "grad_norm": 0.9730509978164584,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 5300
    },
    {
      "epoch": 0.05301,
      "grad_norm": 1.0895476694519508,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 5301
    },
    {
      "epoch": 0.05302,
      "grad_norm": 1.0558202746074077,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 5302
    },
    {
      "epoch": 0.05303,
      "grad_norm": 0.9797012960446656,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 5303
    },
    {
      "epoch": 0.05304,
      "grad_norm": 1.0039073817988884,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 5304
    },
    {
      "epoch": 0.05305,
      "grad_norm": 1.1408892390131204,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 5305
    },
    {
      "epoch": 0.05306,
      "grad_norm": 0.93995449289434,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 5306
    },
    {
      "epoch": 0.05307,
      "grad_norm": 0.9726004341078571,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 5307
    },
    {
      "epoch": 0.05308,
      "grad_norm": 0.9846823344904868,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 5308
    },
    {
      "epoch": 0.05309,
      "grad_norm": 1.1645641513184255,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 5309
    },
    {
      "epoch": 0.0531,
      "grad_norm": 0.9255234669205171,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 5310
    },
    {
      "epoch": 0.05311,
      "grad_norm": 0.9954755426824138,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 5311
    },
    {
      "epoch": 0.05312,
      "grad_norm": 1.140912604498098,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 5312
    },
    {
      "epoch": 0.05313,
      "grad_norm": 1.0342859706036291,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 5313
    },
    {
      "epoch": 0.05314,
      "grad_norm": 1.0050903165394378,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 5314
    },
    {
      "epoch": 0.05315,
      "grad_norm": 1.082132110001949,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 5315
    },
    {
      "epoch": 0.05316,
      "grad_norm": 0.8775743349709987,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 5316
    },
    {
      "epoch": 0.05317,
      "grad_norm": 0.7524051363352736,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 5317
    },
    {
      "epoch": 0.05318,
      "grad_norm": 0.6968397634473481,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 5318
    },
    {
      "epoch": 0.05319,
      "grad_norm": 0.7303279353638819,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 5319
    },
    {
      "epoch": 0.0532,
      "grad_norm": 0.8312560764557734,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 5320
    },
    {
      "epoch": 0.05321,
      "grad_norm": 0.9440547584257499,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 5321
    },
    {
      "epoch": 0.05322,
      "grad_norm": 1.0145241749604252,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 5322
    },
    {
      "epoch": 0.05323,
      "grad_norm": 1.0026467673881212,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 5323
    },
    {
      "epoch": 0.05324,
      "grad_norm": 0.9550475870668266,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 5324
    },
    {
      "epoch": 0.05325,
      "grad_norm": 1.0200704518446446,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 5325
    },
    {
      "epoch": 0.05326,
      "grad_norm": 0.9264156483614246,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 5326
    },
    {
      "epoch": 0.05327,
      "grad_norm": 0.9074863872575771,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 5327
    },
    {
      "epoch": 0.05328,
      "grad_norm": 0.9484736745601532,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 5328
    },
    {
      "epoch": 0.05329,
      "grad_norm": 1.1056353318349814,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 5329
    },
    {
      "epoch": 0.0533,
      "grad_norm": 0.8601194407643497,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 5330
    },
    {
      "epoch": 0.05331,
      "grad_norm": 0.8648893165616912,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 5331
    },
    {
      "epoch": 0.05332,
      "grad_norm": 1.0628193250784792,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 5332
    },
    {
      "epoch": 0.05333,
      "grad_norm": 1.1066111469551312,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 5333
    },
    {
      "epoch": 0.05334,
      "grad_norm": 0.953883540436792,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 5334
    },
    {
      "epoch": 0.05335,
      "grad_norm": 0.9008830143608041,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 5335
    },
    {
      "epoch": 0.05336,
      "grad_norm": 0.8996222966966763,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 5336
    },
    {
      "epoch": 0.05337,
      "grad_norm": 0.9470909772908184,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 5337
    },
    {
      "epoch": 0.05338,
      "grad_norm": 0.9368087966120492,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 5338
    },
    {
      "epoch": 0.05339,
      "grad_norm": 0.9839774636806952,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 5339
    },
    {
      "epoch": 0.0534,
      "grad_norm": 1.10664494283966,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 5340
    },
    {
      "epoch": 0.05341,
      "grad_norm": 1.0229490590816641,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 5341
    },
    {
      "epoch": 0.05342,
      "grad_norm": 1.0465584419606189,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 5342
    },
    {
      "epoch": 0.05343,
      "grad_norm": 1.1640384929974874,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 5343
    },
    {
      "epoch": 0.05344,
      "grad_norm": 0.884957738973249,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 5344
    },
    {
      "epoch": 0.05345,
      "grad_norm": 0.8433240280430108,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 5345
    },
    {
      "epoch": 0.05346,
      "grad_norm": 0.9566468834365702,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 5346
    },
    {
      "epoch": 0.05347,
      "grad_norm": 0.9368336590324607,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 5347
    },
    {
      "epoch": 0.05348,
      "grad_norm": 1.0141732765839058,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 5348
    },
    {
      "epoch": 0.05349,
      "grad_norm": 1.154078618191757,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 5349
    },
    {
      "epoch": 0.0535,
      "grad_norm": 1.0175917042790932,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 5350
    },
    {
      "epoch": 0.05351,
      "grad_norm": 0.8496580619444323,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 5351
    },
    {
      "epoch": 0.05352,
      "grad_norm": 0.8895367323216903,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 5352
    },
    {
      "epoch": 0.05353,
      "grad_norm": 0.9383414607042285,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 5353
    },
    {
      "epoch": 0.05354,
      "grad_norm": 0.9932999421264617,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 5354
    },
    {
      "epoch": 0.05355,
      "grad_norm": 1.1090959785058532,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 5355
    },
    {
      "epoch": 0.05356,
      "grad_norm": 0.9113253650658273,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 5356
    },
    {
      "epoch": 0.05357,
      "grad_norm": 0.9713969124835692,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 5357
    },
    {
      "epoch": 0.05358,
      "grad_norm": 1.1111797677781718,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 5358
    },
    {
      "epoch": 0.05359,
      "grad_norm": 1.12964921180821,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 5359
    },
    {
      "epoch": 0.0536,
      "grad_norm": 0.7734821023817453,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 5360
    },
    {
      "epoch": 0.05361,
      "grad_norm": 0.8519551241940354,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 5361
    },
    {
      "epoch": 0.05362,
      "grad_norm": 0.9322670717841903,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 5362
    },
    {
      "epoch": 0.05363,
      "grad_norm": 1.0074134211588726,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 5363
    },
    {
      "epoch": 0.05364,
      "grad_norm": 0.9873618582440303,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 5364
    },
    {
      "epoch": 0.05365,
      "grad_norm": 0.9112488220616313,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 5365
    },
    {
      "epoch": 0.05366,
      "grad_norm": 0.9087748964782536,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 5366
    },
    {
      "epoch": 0.05367,
      "grad_norm": 1.1130213121604795,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 5367
    },
    {
      "epoch": 0.05368,
      "grad_norm": 1.051640889768411,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 5368
    },
    {
      "epoch": 0.05369,
      "grad_norm": 0.9572517998403266,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 5369
    },
    {
      "epoch": 0.0537,
      "grad_norm": 1.132811606167689,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 5370
    },
    {
      "epoch": 0.05371,
      "grad_norm": 0.9187614369569057,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 5371
    },
    {
      "epoch": 0.05372,
      "grad_norm": 1.0470001010321264,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 5372
    },
    {
      "epoch": 0.05373,
      "grad_norm": 0.9617588492397875,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 5373
    },
    {
      "epoch": 0.05374,
      "grad_norm": 0.9433193815843771,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 5374
    },
    {
      "epoch": 0.05375,
      "grad_norm": 0.9043465599508047,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 5375
    },
    {
      "epoch": 0.05376,
      "grad_norm": 0.8650017321204917,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 5376
    },
    {
      "epoch": 0.05377,
      "grad_norm": 0.8366118978178542,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 5377
    },
    {
      "epoch": 0.05378,
      "grad_norm": 0.9549435064058331,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 5378
    },
    {
      "epoch": 0.05379,
      "grad_norm": 1.1106796516282058,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 5379
    },
    {
      "epoch": 0.0538,
      "grad_norm": 0.831546358079104,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 5380
    },
    {
      "epoch": 0.05381,
      "grad_norm": 0.7873840620558364,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 5381
    },
    {
      "epoch": 0.05382,
      "grad_norm": 0.7745681783586985,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 5382
    },
    {
      "epoch": 0.05383,
      "grad_norm": 0.7612145463814121,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 5383
    },
    {
      "epoch": 0.05384,
      "grad_norm": 0.8198652694354317,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 5384
    },
    {
      "epoch": 0.05385,
      "grad_norm": 0.9417575755533557,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 5385
    },
    {
      "epoch": 0.05386,
      "grad_norm": 0.9333515318457634,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 5386
    },
    {
      "epoch": 0.05387,
      "grad_norm": 0.9470842553395012,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 5387
    },
    {
      "epoch": 0.05388,
      "grad_norm": 1.0202541931803695,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 5388
    },
    {
      "epoch": 0.05389,
      "grad_norm": 1.0837366526435621,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 5389
    },
    {
      "epoch": 0.0539,
      "grad_norm": 0.8073770997786047,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 5390
    },
    {
      "epoch": 0.05391,
      "grad_norm": 0.7682773134551675,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 5391
    },
    {
      "epoch": 0.05392,
      "grad_norm": 0.7018203354535223,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 5392
    },
    {
      "epoch": 0.05393,
      "grad_norm": 0.8253528505413662,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 5393
    },
    {
      "epoch": 0.05394,
      "grad_norm": 0.8821082824869386,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 5394
    },
    {
      "epoch": 0.05395,
      "grad_norm": 1.0233479675976396,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 5395
    },
    {
      "epoch": 0.05396,
      "grad_norm": 0.9868685749565336,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 5396
    },
    {
      "epoch": 0.05397,
      "grad_norm": 0.9032364856754863,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 5397
    },
    {
      "epoch": 0.05398,
      "grad_norm": 0.9305986337257256,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 5398
    },
    {
      "epoch": 0.05399,
      "grad_norm": 0.943643567039578,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 5399
    },
    {
      "epoch": 0.054,
      "grad_norm": 0.9727836672752513,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 5400
    },
    {
      "epoch": 0.05401,
      "grad_norm": 1.0950746328718843,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 5401
    },
    {
      "epoch": 0.05402,
      "grad_norm": 0.9394359475802941,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 5402
    },
    {
      "epoch": 0.05403,
      "grad_norm": 1.100262618217403,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 5403
    },
    {
      "epoch": 0.05404,
      "grad_norm": 0.8289796141686676,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 5404
    },
    {
      "epoch": 0.05405,
      "grad_norm": 0.8162230398030023,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 5405
    },
    {
      "epoch": 0.05406,
      "grad_norm": 0.7883046496886217,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 5406
    },
    {
      "epoch": 0.05407,
      "grad_norm": 0.7936421744252384,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 5407
    },
    {
      "epoch": 0.05408,
      "grad_norm": 0.9053334375282556,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 5408
    },
    {
      "epoch": 0.05409,
      "grad_norm": 0.9451799555181426,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 5409
    },
    {
      "epoch": 0.0541,
      "grad_norm": 1.0666670568701657,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 5410
    },
    {
      "epoch": 0.05411,
      "grad_norm": 0.9181326660070915,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 5411
    },
    {
      "epoch": 0.05412,
      "grad_norm": 1.021682961374821,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 5412
    },
    {
      "epoch": 0.05413,
      "grad_norm": 1.1798050564607474,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 5413
    },
    {
      "epoch": 0.05414,
      "grad_norm": 0.9232912835820161,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 5414
    },
    {
      "epoch": 0.05415,
      "grad_norm": 0.8725384280761546,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 5415
    },
    {
      "epoch": 0.05416,
      "grad_norm": 0.8010341905607388,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 5416
    },
    {
      "epoch": 0.05417,
      "grad_norm": 0.9197583841797057,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 5417
    },
    {
      "epoch": 0.05418,
      "grad_norm": 0.9755108515112911,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 5418
    },
    {
      "epoch": 0.05419,
      "grad_norm": 0.9843859573873037,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 5419
    },
    {
      "epoch": 0.0542,
      "grad_norm": 1.1783693707948757,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 5420
    },
    {
      "epoch": 0.05421,
      "grad_norm": 1.0734083109765062,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 5421
    },
    {
      "epoch": 0.05422,
      "grad_norm": 0.980524671161552,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 5422
    },
    {
      "epoch": 0.05423,
      "grad_norm": 1.134071438565773,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 5423
    },
    {
      "epoch": 0.05424,
      "grad_norm": 1.2330179209043135,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 5424
    },
    {
      "epoch": 0.05425,
      "grad_norm": 1.013866607135996,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 5425
    },
    {
      "epoch": 0.05426,
      "grad_norm": 1.1436320829532476,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 5426
    },
    {
      "epoch": 0.05427,
      "grad_norm": 0.8459842849855579,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 5427
    },
    {
      "epoch": 0.05428,
      "grad_norm": 0.9549700978254552,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 5428
    },
    {
      "epoch": 0.05429,
      "grad_norm": 0.9172026670565415,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 5429
    },
    {
      "epoch": 0.0543,
      "grad_norm": 1.120952695970183,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 5430
    },
    {
      "epoch": 0.05431,
      "grad_norm": 1.1974245320079564,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 5431
    },
    {
      "epoch": 0.05432,
      "grad_norm": 0.8852096173514169,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 5432
    },
    {
      "epoch": 0.05433,
      "grad_norm": 0.9401011460255838,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 5433
    },
    {
      "epoch": 0.05434,
      "grad_norm": 0.9678587871600712,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 5434
    },
    {
      "epoch": 0.05435,
      "grad_norm": 1.0899343219435063,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 5435
    },
    {
      "epoch": 0.05436,
      "grad_norm": 1.0563921820873317,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 5436
    },
    {
      "epoch": 0.05437,
      "grad_norm": 1.0856872480417759,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 5437
    },
    {
      "epoch": 0.05438,
      "grad_norm": 0.7984063525797823,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 5438
    },
    {
      "epoch": 0.05439,
      "grad_norm": 0.7454173407684945,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 5439
    },
    {
      "epoch": 0.0544,
      "grad_norm": 0.7921737605396232,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 5440
    },
    {
      "epoch": 0.05441,
      "grad_norm": 1.0225413249269952,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 5441
    },
    {
      "epoch": 0.05442,
      "grad_norm": 1.2118058028339453,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 5442
    },
    {
      "epoch": 0.05443,
      "grad_norm": 0.6321819000655003,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 5443
    },
    {
      "epoch": 0.05444,
      "grad_norm": 0.8027542268287786,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 5444
    },
    {
      "epoch": 0.05445,
      "grad_norm": 0.8735412506958715,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 5445
    },
    {
      "epoch": 0.05446,
      "grad_norm": 0.7936222595707908,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 5446
    },
    {
      "epoch": 0.05447,
      "grad_norm": 0.6924205266934994,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 5447
    },
    {
      "epoch": 0.05448,
      "grad_norm": 0.6812700924878988,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 5448
    },
    {
      "epoch": 0.05449,
      "grad_norm": 0.7401926125886807,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 5449
    },
    {
      "epoch": 0.0545,
      "grad_norm": 0.8398967204050066,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 5450
    },
    {
      "epoch": 0.05451,
      "grad_norm": 0.9232648502496631,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 5451
    },
    {
      "epoch": 0.05452,
      "grad_norm": 1.018982529104012,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 5452
    },
    {
      "epoch": 0.05453,
      "grad_norm": 1.09144709823669,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 5453
    },
    {
      "epoch": 0.05454,
      "grad_norm": 0.9389422208676496,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 5454
    },
    {
      "epoch": 0.05455,
      "grad_norm": 1.011860130767919,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 5455
    },
    {
      "epoch": 0.05456,
      "grad_norm": 1.1005201477477284,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 5456
    },
    {
      "epoch": 0.05457,
      "grad_norm": 0.8394502161551594,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 5457
    },
    {
      "epoch": 0.05458,
      "grad_norm": 0.8522093521223234,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 5458
    },
    {
      "epoch": 0.05459,
      "grad_norm": 0.8521617398500968,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 5459
    },
    {
      "epoch": 0.0546,
      "grad_norm": 0.9898185774528117,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 5460
    },
    {
      "epoch": 0.05461,
      "grad_norm": 1.037914610295231,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 5461
    },
    {
      "epoch": 0.05462,
      "grad_norm": 1.0314633015920627,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 5462
    },
    {
      "epoch": 0.05463,
      "grad_norm": 1.0714702174113098,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 5463
    },
    {
      "epoch": 0.05464,
      "grad_norm": 1.0656649423089701,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 5464
    },
    {
      "epoch": 0.05465,
      "grad_norm": 1.0070370867844776,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 5465
    },
    {
      "epoch": 0.05466,
      "grad_norm": 0.971949088054457,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 5466
    },
    {
      "epoch": 0.05467,
      "grad_norm": 1.1027271603054372,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 5467
    },
    {
      "epoch": 0.05468,
      "grad_norm": 0.9722631661342838,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 5468
    },
    {
      "epoch": 0.05469,
      "grad_norm": 1.535529064177968,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 5469
    },
    {
      "epoch": 0.0547,
      "grad_norm": 0.8945992610917329,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 5470
    },
    {
      "epoch": 0.05471,
      "grad_norm": 0.8971438224187216,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 5471
    },
    {
      "epoch": 0.05472,
      "grad_norm": 0.785537906391861,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 5472
    },
    {
      "epoch": 0.05473,
      "grad_norm": 0.7645096419487161,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 5473
    },
    {
      "epoch": 0.05474,
      "grad_norm": 0.9043493315770855,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 5474
    },
    {
      "epoch": 0.05475,
      "grad_norm": 0.9826087228695892,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 5475
    },
    {
      "epoch": 0.05476,
      "grad_norm": 1.0455880982626005,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 5476
    },
    {
      "epoch": 0.05477,
      "grad_norm": 1.0460718667427993,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 5477
    },
    {
      "epoch": 0.05478,
      "grad_norm": 0.9784914027077458,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 5478
    },
    {
      "epoch": 0.05479,
      "grad_norm": 1.06468120790793,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 5479
    },
    {
      "epoch": 0.0548,
      "grad_norm": 0.9724427087606514,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 5480
    },
    {
      "epoch": 0.05481,
      "grad_norm": 1.139450823817563,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 5481
    },
    {
      "epoch": 0.05482,
      "grad_norm": 1.0103917095506671,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 5482
    },
    {
      "epoch": 0.05483,
      "grad_norm": 0.9941838375298019,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 5483
    },
    {
      "epoch": 0.05484,
      "grad_norm": 1.0065630407668353,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 5484
    },
    {
      "epoch": 0.05485,
      "grad_norm": 1.1859794607003078,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 5485
    },
    {
      "epoch": 0.05486,
      "grad_norm": 0.9117774295489004,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 5486
    },
    {
      "epoch": 0.05487,
      "grad_norm": 1.0354321077724413,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 5487
    },
    {
      "epoch": 0.05488,
      "grad_norm": 0.9520742156522823,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 5488
    },
    {
      "epoch": 0.05489,
      "grad_norm": 0.9583850608961483,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 5489
    },
    {
      "epoch": 0.0549,
      "grad_norm": 0.9323245193356091,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 5490
    },
    {
      "epoch": 0.05491,
      "grad_norm": 0.9691425932975921,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 5491
    },
    {
      "epoch": 0.05492,
      "grad_norm": 1.0404125399983395,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 5492
    },
    {
      "epoch": 0.05493,
      "grad_norm": 0.8797071139515741,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 5493
    },
    {
      "epoch": 0.05494,
      "grad_norm": 0.9257700859047452,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 5494
    },
    {
      "epoch": 0.05495,
      "grad_norm": 0.837046815044842,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 5495
    },
    {
      "epoch": 0.05496,
      "grad_norm": 0.868795138162349,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 5496
    },
    {
      "epoch": 0.05497,
      "grad_norm": 1.1666053988158387,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 5497
    },
    {
      "epoch": 0.05498,
      "grad_norm": 0.9182958212992614,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 5498
    },
    {
      "epoch": 0.05499,
      "grad_norm": 0.8243177544407708,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 5499
    },
    {
      "epoch": 0.055,
      "grad_norm": 0.930633426581144,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 5500
    },
    {
      "epoch": 0.05501,
      "grad_norm": 0.9853106164483082,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 5501
    },
    {
      "epoch": 0.05502,
      "grad_norm": 1.002532887849376,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 5502
    },
    {
      "epoch": 0.05503,
      "grad_norm": 1.134728987679396,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 5503
    },
    {
      "epoch": 0.05504,
      "grad_norm": 0.8688543177042416,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 5504
    },
    {
      "epoch": 0.05505,
      "grad_norm": 0.8885019557224482,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 5505
    },
    {
      "epoch": 0.05506,
      "grad_norm": 1.0191696763985227,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 5506
    },
    {
      "epoch": 0.05507,
      "grad_norm": 0.9581989274432549,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 5507
    },
    {
      "epoch": 0.05508,
      "grad_norm": 1.2059958374753366,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 5508
    },
    {
      "epoch": 0.05509,
      "grad_norm": 1.0509079701116766,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 5509
    },
    {
      "epoch": 0.0551,
      "grad_norm": 0.9629459111896204,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 5510
    },
    {
      "epoch": 0.05511,
      "grad_norm": 0.9385152557998871,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 5511
    },
    {
      "epoch": 0.05512,
      "grad_norm": 0.9992228341288117,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 5512
    },
    {
      "epoch": 0.05513,
      "grad_norm": 0.9076413869206522,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 5513
    },
    {
      "epoch": 0.05514,
      "grad_norm": 0.9454063730057051,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 5514
    },
    {
      "epoch": 0.05515,
      "grad_norm": 0.9333899274103106,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 5515
    },
    {
      "epoch": 0.05516,
      "grad_norm": 0.9128223147263614,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 5516
    },
    {
      "epoch": 0.05517,
      "grad_norm": 0.9501252498361874,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 5517
    },
    {
      "epoch": 0.05518,
      "grad_norm": 0.8979581497014404,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 5518
    },
    {
      "epoch": 0.05519,
      "grad_norm": 0.8443581212066782,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 5519
    },
    {
      "epoch": 0.0552,
      "grad_norm": 0.8902006864056781,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 5520
    },
    {
      "epoch": 0.05521,
      "grad_norm": 1.087402109927747,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 5521
    },
    {
      "epoch": 0.05522,
      "grad_norm": 1.0502403491974475,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 5522
    },
    {
      "epoch": 0.05523,
      "grad_norm": 1.0452410374132677,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 5523
    },
    {
      "epoch": 0.05524,
      "grad_norm": 0.9514484278835924,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 5524
    },
    {
      "epoch": 0.05525,
      "grad_norm": 1.0272241045462294,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 5525
    },
    {
      "epoch": 0.05526,
      "grad_norm": 0.8733718754810375,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 5526
    },
    {
      "epoch": 0.05527,
      "grad_norm": 0.8731879871732975,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 5527
    },
    {
      "epoch": 0.05528,
      "grad_norm": 1.1037182994286576,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 5528
    },
    {
      "epoch": 0.05529,
      "grad_norm": 1.0027208430036605,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 5529
    },
    {
      "epoch": 0.0553,
      "grad_norm": 1.035023109428945,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 5530
    },
    {
      "epoch": 0.05531,
      "grad_norm": 0.9462736114003235,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 5531
    },
    {
      "epoch": 0.05532,
      "grad_norm": 1.1792902449799267,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 5532
    },
    {
      "epoch": 0.05533,
      "grad_norm": 1.0162972400663883,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 5533
    },
    {
      "epoch": 0.05534,
      "grad_norm": 1.1486180781871198,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 5534
    },
    {
      "epoch": 0.05535,
      "grad_norm": 1.0752582119191083,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 5535
    },
    {
      "epoch": 0.05536,
      "grad_norm": 0.9581234101005884,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 5536
    },
    {
      "epoch": 0.05537,
      "grad_norm": 0.8208048349609377,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 5537
    },
    {
      "epoch": 0.05538,
      "grad_norm": 0.9473770252339098,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 5538
    },
    {
      "epoch": 0.05539,
      "grad_norm": 1.2702509072007524,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 5539
    },
    {
      "epoch": 0.0554,
      "grad_norm": 0.9017522335362036,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 5540
    },
    {
      "epoch": 0.05541,
      "grad_norm": 0.8359582648956668,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 5541
    },
    {
      "epoch": 0.05542,
      "grad_norm": 0.9218480107385333,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 5542
    },
    {
      "epoch": 0.05543,
      "grad_norm": 0.8696347559650821,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 5543
    },
    {
      "epoch": 0.05544,
      "grad_norm": 1.055625206756629,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 5544
    },
    {
      "epoch": 0.05545,
      "grad_norm": 1.1728733370559363,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 5545
    },
    {
      "epoch": 0.05546,
      "grad_norm": 0.7788999498236301,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 5546
    },
    {
      "epoch": 0.05547,
      "grad_norm": 0.8311570515952018,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 5547
    },
    {
      "epoch": 0.05548,
      "grad_norm": 0.9335591947026343,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 5548
    },
    {
      "epoch": 0.05549,
      "grad_norm": 1.0573495296822653,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 5549
    },
    {
      "epoch": 0.0555,
      "grad_norm": 1.0502011204824901,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 5550
    },
    {
      "epoch": 0.05551,
      "grad_norm": 0.9831951258747678,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 5551
    },
    {
      "epoch": 0.05552,
      "grad_norm": 1.1313502639381925,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 5552
    },
    {
      "epoch": 0.05553,
      "grad_norm": 0.8713935309218466,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 5553
    },
    {
      "epoch": 0.05554,
      "grad_norm": 0.9135700599486382,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 5554
    },
    {
      "epoch": 0.05555,
      "grad_norm": 0.8386399502596644,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 5555
    },
    {
      "epoch": 0.05556,
      "grad_norm": 0.8427182198646174,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 5556
    },
    {
      "epoch": 0.05557,
      "grad_norm": 0.8853744024878168,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 5557
    },
    {
      "epoch": 0.05558,
      "grad_norm": 0.8682691991197287,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 5558
    },
    {
      "epoch": 0.05559,
      "grad_norm": 0.9168284316303011,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 5559
    },
    {
      "epoch": 0.0556,
      "grad_norm": 0.8495910068211255,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 5560
    },
    {
      "epoch": 0.05561,
      "grad_norm": 0.8580860935781918,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 5561
    },
    {
      "epoch": 0.05562,
      "grad_norm": 0.8995255593141265,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 5562
    },
    {
      "epoch": 0.05563,
      "grad_norm": 0.9397411181545186,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 5563
    },
    {
      "epoch": 0.05564,
      "grad_norm": 0.9961124788426614,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 5564
    },
    {
      "epoch": 0.05565,
      "grad_norm": 1.017582443976892,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 5565
    },
    {
      "epoch": 0.05566,
      "grad_norm": 0.9196943277209378,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 5566
    },
    {
      "epoch": 0.05567,
      "grad_norm": 0.887778482100592,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 5567
    },
    {
      "epoch": 0.05568,
      "grad_norm": 0.9446867885667367,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 5568
    },
    {
      "epoch": 0.05569,
      "grad_norm": 1.2048355489642966,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 5569
    },
    {
      "epoch": 0.0557,
      "grad_norm": 0.9947727024541948,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 5570
    },
    {
      "epoch": 0.05571,
      "grad_norm": 0.9242279105036012,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 5571
    },
    {
      "epoch": 0.05572,
      "grad_norm": 0.8718726616570441,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 5572
    },
    {
      "epoch": 0.05573,
      "grad_norm": 0.9988632134064598,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 5573
    },
    {
      "epoch": 0.05574,
      "grad_norm": 1.0858108953319112,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 5574
    },
    {
      "epoch": 0.05575,
      "grad_norm": 0.89341861731438,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 5575
    },
    {
      "epoch": 0.05576,
      "grad_norm": 0.9927093004321265,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 5576
    },
    {
      "epoch": 0.05577,
      "grad_norm": 1.0561976942629276,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 5577
    },
    {
      "epoch": 0.05578,
      "grad_norm": 0.9226854378392683,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 5578
    },
    {
      "epoch": 0.05579,
      "grad_norm": 0.9660492552781841,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 5579
    },
    {
      "epoch": 0.0558,
      "grad_norm": 1.0219080757366508,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 5580
    },
    {
      "epoch": 0.05581,
      "grad_norm": 0.9262442274506089,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 5581
    },
    {
      "epoch": 0.05582,
      "grad_norm": 0.9589806371639085,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 5582
    },
    {
      "epoch": 0.05583,
      "grad_norm": 1.0645509578183838,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 5583
    },
    {
      "epoch": 0.05584,
      "grad_norm": 1.051226914948146,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 5584
    },
    {
      "epoch": 0.05585,
      "grad_norm": 1.0467720353310674,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 5585
    },
    {
      "epoch": 0.05586,
      "grad_norm": 0.9663212590556503,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 5586
    },
    {
      "epoch": 0.05587,
      "grad_norm": 0.9185379338813185,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 5587
    },
    {
      "epoch": 0.05588,
      "grad_norm": 0.9694934031853484,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 5588
    },
    {
      "epoch": 0.05589,
      "grad_norm": 1.0526451937838062,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 5589
    },
    {
      "epoch": 0.0559,
      "grad_norm": 1.1085070368071448,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 5590
    },
    {
      "epoch": 0.05591,
      "grad_norm": 1.0162203009874562,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 5591
    },
    {
      "epoch": 0.05592,
      "grad_norm": 0.951694969481166,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 5592
    },
    {
      "epoch": 0.05593,
      "grad_norm": 0.8175074193854779,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 5593
    },
    {
      "epoch": 0.05594,
      "grad_norm": 0.9255694773963952,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 5594
    },
    {
      "epoch": 0.05595,
      "grad_norm": 1.0156609990647611,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 5595
    },
    {
      "epoch": 0.05596,
      "grad_norm": 0.932955278279892,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 5596
    },
    {
      "epoch": 0.05597,
      "grad_norm": 0.920908250325548,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 5597
    },
    {
      "epoch": 0.05598,
      "grad_norm": 1.0626157594613586,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 5598
    },
    {
      "epoch": 0.05599,
      "grad_norm": 0.9591722885772912,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 5599
    },
    {
      "epoch": 0.056,
      "grad_norm": 0.8972051686796552,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 5600
    },
    {
      "epoch": 0.05601,
      "grad_norm": 0.94039167026587,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 5601
    },
    {
      "epoch": 0.05602,
      "grad_norm": 0.8789138951678084,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 5602
    },
    {
      "epoch": 0.05603,
      "grad_norm": 1.0980048694137343,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 5603
    },
    {
      "epoch": 0.05604,
      "grad_norm": 1.132678130199623,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 5604
    },
    {
      "epoch": 0.05605,
      "grad_norm": 0.9376010958362765,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 5605
    },
    {
      "epoch": 0.05606,
      "grad_norm": 1.012601875884022,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 5606
    },
    {
      "epoch": 0.05607,
      "grad_norm": 1.078658031552116,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 5607
    },
    {
      "epoch": 0.05608,
      "grad_norm": 1.0104480034658165,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 5608
    },
    {
      "epoch": 0.05609,
      "grad_norm": 1.1587851237585396,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 5609
    },
    {
      "epoch": 0.0561,
      "grad_norm": 0.9960041589297254,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 5610
    },
    {
      "epoch": 0.05611,
      "grad_norm": 0.9361590301922911,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 5611
    },
    {
      "epoch": 0.05612,
      "grad_norm": 0.9764370565765741,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 5612
    },
    {
      "epoch": 0.05613,
      "grad_norm": 1.0014577790700079,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 5613
    },
    {
      "epoch": 0.05614,
      "grad_norm": 0.9831216849472494,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 5614
    },
    {
      "epoch": 0.05615,
      "grad_norm": 1.1586083324328103,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 5615
    },
    {
      "epoch": 0.05616,
      "grad_norm": 0.8890921040375313,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 5616
    },
    {
      "epoch": 0.05617,
      "grad_norm": 0.7957949051675782,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 5617
    },
    {
      "epoch": 0.05618,
      "grad_norm": 0.7215932182196926,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 5618
    },
    {
      "epoch": 0.05619,
      "grad_norm": 0.7741180783292809,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 5619
    },
    {
      "epoch": 0.0562,
      "grad_norm": 0.7511553658323501,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 5620
    },
    {
      "epoch": 0.05621,
      "grad_norm": 0.6396239807569799,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 5621
    },
    {
      "epoch": 0.05622,
      "grad_norm": 0.6074326667753387,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 5622
    },
    {
      "epoch": 0.05623,
      "grad_norm": 0.6362168788902988,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 5623
    },
    {
      "epoch": 0.05624,
      "grad_norm": 0.7164948632194134,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 5624
    },
    {
      "epoch": 0.05625,
      "grad_norm": 0.7858917280239912,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 5625
    },
    {
      "epoch": 0.05626,
      "grad_norm": 0.9610068641748187,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 5626
    },
    {
      "epoch": 0.05627,
      "grad_norm": 1.0076786147054788,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 5627
    },
    {
      "epoch": 0.05628,
      "grad_norm": 1.1184221088760653,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 5628
    },
    {
      "epoch": 0.05629,
      "grad_norm": 1.009005087989754,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 5629
    },
    {
      "epoch": 0.0563,
      "grad_norm": 0.9675845365105762,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 5630
    },
    {
      "epoch": 0.05631,
      "grad_norm": 0.9722083145337348,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 5631
    },
    {
      "epoch": 0.05632,
      "grad_norm": 0.9663539092020825,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 5632
    },
    {
      "epoch": 0.05633,
      "grad_norm": 0.9758861223474846,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 5633
    },
    {
      "epoch": 0.05634,
      "grad_norm": 1.1099380799726346,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 5634
    },
    {
      "epoch": 0.05635,
      "grad_norm": 0.9793289159407238,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 5635
    },
    {
      "epoch": 0.05636,
      "grad_norm": 1.0928319649450002,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 5636
    },
    {
      "epoch": 0.05637,
      "grad_norm": 0.9411278378658644,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 5637
    },
    {
      "epoch": 0.05638,
      "grad_norm": 1.006716505320645,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 5638
    },
    {
      "epoch": 0.05639,
      "grad_norm": 1.140827528278389,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 5639
    },
    {
      "epoch": 0.0564,
      "grad_norm": 1.1625088389769833,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 5640
    },
    {
      "epoch": 0.05641,
      "grad_norm": 0.9893427923995295,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 5641
    },
    {
      "epoch": 0.05642,
      "grad_norm": 0.9716727488546525,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 5642
    },
    {
      "epoch": 0.05643,
      "grad_norm": 0.8388359388426512,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 5643
    },
    {
      "epoch": 0.05644,
      "grad_norm": 0.8864776479628189,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 5644
    },
    {
      "epoch": 0.05645,
      "grad_norm": 1.0289020783924894,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 5645
    },
    {
      "epoch": 0.05646,
      "grad_norm": 1.1004794764123877,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 5646
    },
    {
      "epoch": 0.05647,
      "grad_norm": 0.9547139445319417,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 5647
    },
    {
      "epoch": 0.05648,
      "grad_norm": 0.967189977084837,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 5648
    },
    {
      "epoch": 0.05649,
      "grad_norm": 0.973718841402612,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 5649
    },
    {
      "epoch": 0.0565,
      "grad_norm": 1.02998820647941,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 5650
    },
    {
      "epoch": 0.05651,
      "grad_norm": 0.8716926811029921,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 5651
    },
    {
      "epoch": 0.05652,
      "grad_norm": 0.8882622031970813,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 5652
    },
    {
      "epoch": 0.05653,
      "grad_norm": 0.8791179996457095,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 5653
    },
    {
      "epoch": 0.05654,
      "grad_norm": 0.9428467074127278,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 5654
    },
    {
      "epoch": 0.05655,
      "grad_norm": 0.9168230086253641,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 5655
    },
    {
      "epoch": 0.05656,
      "grad_norm": 0.9563246120783195,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 5656
    },
    {
      "epoch": 0.05657,
      "grad_norm": 1.0720010551454098,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 5657
    },
    {
      "epoch": 0.05658,
      "grad_norm": 0.8856156383346027,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 5658
    },
    {
      "epoch": 0.05659,
      "grad_norm": 0.8368418786449191,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 5659
    },
    {
      "epoch": 0.0566,
      "grad_norm": 0.8374298075537798,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 5660
    },
    {
      "epoch": 0.05661,
      "grad_norm": 0.9364487912430249,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 5661
    },
    {
      "epoch": 0.05662,
      "grad_norm": 1.000052264217599,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 5662
    },
    {
      "epoch": 0.05663,
      "grad_norm": 1.0446835856273087,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 5663
    },
    {
      "epoch": 0.05664,
      "grad_norm": 0.9023506375796267,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 5664
    },
    {
      "epoch": 0.05665,
      "grad_norm": 0.977889227291688,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 5665
    },
    {
      "epoch": 0.05666,
      "grad_norm": 1.130926840966814,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 5666
    },
    {
      "epoch": 0.05667,
      "grad_norm": 1.0607816508483603,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 5667
    },
    {
      "epoch": 0.05668,
      "grad_norm": 0.9792158523254157,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 5668
    },
    {
      "epoch": 0.05669,
      "grad_norm": 0.9055981740448197,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 5669
    },
    {
      "epoch": 0.0567,
      "grad_norm": 1.1476920087687061,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 5670
    },
    {
      "epoch": 0.05671,
      "grad_norm": 0.8960616098490328,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 5671
    },
    {
      "epoch": 0.05672,
      "grad_norm": 0.99321915369349,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 5672
    },
    {
      "epoch": 0.05673,
      "grad_norm": 1.0447752766651002,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 5673
    },
    {
      "epoch": 0.05674,
      "grad_norm": 1.077079251419245,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 5674
    },
    {
      "epoch": 0.05675,
      "grad_norm": 0.919109346681063,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 5675
    },
    {
      "epoch": 0.05676,
      "grad_norm": 0.9790547358628968,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 5676
    },
    {
      "epoch": 0.05677,
      "grad_norm": 1.1793670646509047,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 5677
    },
    {
      "epoch": 0.05678,
      "grad_norm": 0.7472606166861204,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 5678
    },
    {
      "epoch": 0.05679,
      "grad_norm": 0.8050508896957376,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 5679
    },
    {
      "epoch": 0.0568,
      "grad_norm": 0.9344025333558529,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 5680
    },
    {
      "epoch": 0.05681,
      "grad_norm": 1.2348092828821307,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 5681
    },
    {
      "epoch": 0.05682,
      "grad_norm": 1.1959381020011484,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 5682
    },
    {
      "epoch": 0.05683,
      "grad_norm": 0.8976257998161941,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 5683
    },
    {
      "epoch": 0.05684,
      "grad_norm": 0.7252712065821193,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 5684
    },
    {
      "epoch": 0.05685,
      "grad_norm": 0.8829690678207763,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 5685
    },
    {
      "epoch": 0.05686,
      "grad_norm": 1.1058469730277019,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 5686
    },
    {
      "epoch": 0.05687,
      "grad_norm": 0.9611307746153006,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 5687
    },
    {
      "epoch": 0.05688,
      "grad_norm": 0.972565534030192,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 5688
    },
    {
      "epoch": 0.05689,
      "grad_norm": 1.096241549462336,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 5689
    },
    {
      "epoch": 0.0569,
      "grad_norm": 0.8973939245294639,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 5690
    },
    {
      "epoch": 0.05691,
      "grad_norm": 1.1419881519139168,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 5691
    },
    {
      "epoch": 0.05692,
      "grad_norm": 0.9949113970884143,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 5692
    },
    {
      "epoch": 0.05693,
      "grad_norm": 0.8512031165359729,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 5693
    },
    {
      "epoch": 0.05694,
      "grad_norm": 0.9426191438665297,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 5694
    },
    {
      "epoch": 0.05695,
      "grad_norm": 0.9020092132644848,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 5695
    },
    {
      "epoch": 0.05696,
      "grad_norm": 0.8485145899081942,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 5696
    },
    {
      "epoch": 0.05697,
      "grad_norm": 0.8561615924770996,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 5697
    },
    {
      "epoch": 0.05698,
      "grad_norm": 0.8260144341160308,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 5698
    },
    {
      "epoch": 0.05699,
      "grad_norm": 0.966693143570733,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 5699
    },
    {
      "epoch": 0.057,
      "grad_norm": 1.1487198582029357,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 5700
    },
    {
      "epoch": 0.05701,
      "grad_norm": 1.0854758797164454,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 5701
    },
    {
      "epoch": 0.05702,
      "grad_norm": 0.8835018328849502,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 5702
    },
    {
      "epoch": 0.05703,
      "grad_norm": 0.7845742412565407,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 5703
    },
    {
      "epoch": 0.05704,
      "grad_norm": 0.8498642388489823,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 5704
    },
    {
      "epoch": 0.05705,
      "grad_norm": 1.1326574547883799,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 5705
    },
    {
      "epoch": 0.05706,
      "grad_norm": 1.1434161313327718,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 5706
    },
    {
      "epoch": 0.05707,
      "grad_norm": 0.8510990859187535,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 5707
    },
    {
      "epoch": 0.05708,
      "grad_norm": 0.8230771160938593,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 5708
    },
    {
      "epoch": 0.05709,
      "grad_norm": 0.8626308606823649,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 5709
    },
    {
      "epoch": 0.0571,
      "grad_norm": 0.8749237183120739,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 5710
    },
    {
      "epoch": 0.05711,
      "grad_norm": 1.0435884220325964,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 5711
    },
    {
      "epoch": 0.05712,
      "grad_norm": 1.1753101650891566,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 5712
    },
    {
      "epoch": 0.05713,
      "grad_norm": 0.9392540388426064,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 5713
    },
    {
      "epoch": 0.05714,
      "grad_norm": 1.1219818951823304,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 5714
    },
    {
      "epoch": 0.05715,
      "grad_norm": 0.9027198476231381,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 5715
    },
    {
      "epoch": 0.05716,
      "grad_norm": 0.9350712749494232,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 5716
    },
    {
      "epoch": 0.05717,
      "grad_norm": 1.078949022904377,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 5717
    },
    {
      "epoch": 0.05718,
      "grad_norm": 0.9320114628230994,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 5718
    },
    {
      "epoch": 0.05719,
      "grad_norm": 1.0737725128729791,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 5719
    },
    {
      "epoch": 0.0572,
      "grad_norm": 1.4370901793089597,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 5720
    },
    {
      "epoch": 0.05721,
      "grad_norm": 0.6835699725834176,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 5721
    },
    {
      "epoch": 0.05722,
      "grad_norm": 0.9019947466995998,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 5722
    },
    {
      "epoch": 0.05723,
      "grad_norm": 0.9478261688039891,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 5723
    },
    {
      "epoch": 0.05724,
      "grad_norm": 0.9063450900670779,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 5724
    },
    {
      "epoch": 0.05725,
      "grad_norm": 0.8956563510530542,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 5725
    },
    {
      "epoch": 0.05726,
      "grad_norm": 0.944931440902404,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 5726
    },
    {
      "epoch": 0.05727,
      "grad_norm": 0.9785206525274993,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 5727
    },
    {
      "epoch": 0.05728,
      "grad_norm": 0.9750029413661264,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 5728
    },
    {
      "epoch": 0.05729,
      "grad_norm": 0.9458877993555904,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 5729
    },
    {
      "epoch": 0.0573,
      "grad_norm": 0.8220018329351471,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 5730
    },
    {
      "epoch": 0.05731,
      "grad_norm": 0.8935219092820262,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 5731
    },
    {
      "epoch": 0.05732,
      "grad_norm": 1.011416798805237,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 5732
    },
    {
      "epoch": 0.05733,
      "grad_norm": 1.191217640923215,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 5733
    },
    {
      "epoch": 0.05734,
      "grad_norm": 0.9117001751188784,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 5734
    },
    {
      "epoch": 0.05735,
      "grad_norm": 0.7434085878525878,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 5735
    },
    {
      "epoch": 0.05736,
      "grad_norm": 0.7886018670166627,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 5736
    },
    {
      "epoch": 0.05737,
      "grad_norm": 0.8276223745361921,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 5737
    },
    {
      "epoch": 0.05738,
      "grad_norm": 0.9579259513547983,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 5738
    },
    {
      "epoch": 0.05739,
      "grad_norm": 0.9985511509251391,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 5739
    },
    {
      "epoch": 0.0574,
      "grad_norm": 0.9372357444408635,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 5740
    },
    {
      "epoch": 0.05741,
      "grad_norm": 1.037244120040037,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 5741
    },
    {
      "epoch": 0.05742,
      "grad_norm": 1.0925704013324562,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 5742
    },
    {
      "epoch": 0.05743,
      "grad_norm": 0.9822101766913746,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 5743
    },
    {
      "epoch": 0.05744,
      "grad_norm": 0.9415350376527191,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 5744
    },
    {
      "epoch": 0.05745,
      "grad_norm": 0.8514513969086831,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 5745
    },
    {
      "epoch": 0.05746,
      "grad_norm": 0.9590813051602415,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 5746
    },
    {
      "epoch": 0.05747,
      "grad_norm": 1.033811045759274,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 5747
    },
    {
      "epoch": 0.05748,
      "grad_norm": 0.9881694198935187,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 5748
    },
    {
      "epoch": 0.05749,
      "grad_norm": 1.2197505780445184,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 5749
    },
    {
      "epoch": 0.0575,
      "grad_norm": 0.8960825057299955,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 5750
    },
    {
      "epoch": 0.05751,
      "grad_norm": 0.9380756538662388,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 5751
    },
    {
      "epoch": 0.05752,
      "grad_norm": 1.0494754059465994,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 5752
    },
    {
      "epoch": 0.05753,
      "grad_norm": 0.9737106865451927,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 5753
    },
    {
      "epoch": 0.05754,
      "grad_norm": 1.14044841129864,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 5754
    },
    {
      "epoch": 0.05755,
      "grad_norm": 0.9859781548984715,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 5755
    },
    {
      "epoch": 0.05756,
      "grad_norm": 1.1440582623675561,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 5756
    },
    {
      "epoch": 0.05757,
      "grad_norm": 1.0784582404787597,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 5757
    },
    {
      "epoch": 0.05758,
      "grad_norm": 1.0238978196194495,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 5758
    },
    {
      "epoch": 0.05759,
      "grad_norm": 1.1962555833625972,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 5759
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.8606928693615246,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 5760
    },
    {
      "epoch": 0.05761,
      "grad_norm": 0.8065313954609131,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 5761
    },
    {
      "epoch": 0.05762,
      "grad_norm": 1.006617466336666,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 5762
    },
    {
      "epoch": 0.05763,
      "grad_norm": 1.1736455692173053,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 5763
    },
    {
      "epoch": 0.05764,
      "grad_norm": 1.0156358262996825,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 5764
    },
    {
      "epoch": 0.05765,
      "grad_norm": 1.1676317429304073,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 5765
    },
    {
      "epoch": 0.05766,
      "grad_norm": 0.9296797123737459,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 5766
    },
    {
      "epoch": 0.05767,
      "grad_norm": 1.108411398313796,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 5767
    },
    {
      "epoch": 0.05768,
      "grad_norm": 0.8873418356321205,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 5768
    },
    {
      "epoch": 0.05769,
      "grad_norm": 0.8943391072499615,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 5769
    },
    {
      "epoch": 0.0577,
      "grad_norm": 0.7643635046640151,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 5770
    },
    {
      "epoch": 0.05771,
      "grad_norm": 0.8171732047022162,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 5771
    },
    {
      "epoch": 0.05772,
      "grad_norm": 0.9195825274621192,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 5772
    },
    {
      "epoch": 0.05773,
      "grad_norm": 1.1643144465255408,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 5773
    },
    {
      "epoch": 0.05774,
      "grad_norm": 1.0670738905589834,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 5774
    },
    {
      "epoch": 0.05775,
      "grad_norm": 1.1379995423607696,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 5775
    },
    {
      "epoch": 0.05776,
      "grad_norm": 1.0629804758835848,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 5776
    },
    {
      "epoch": 0.05777,
      "grad_norm": 0.9806929321830686,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 5777
    },
    {
      "epoch": 0.05778,
      "grad_norm": 1.0104200584581524,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 5778
    },
    {
      "epoch": 0.05779,
      "grad_norm": 1.0501375318209158,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 5779
    },
    {
      "epoch": 0.0578,
      "grad_norm": 1.0936198888153104,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 5780
    },
    {
      "epoch": 0.05781,
      "grad_norm": 1.0599398702570635,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 5781
    },
    {
      "epoch": 0.05782,
      "grad_norm": 0.8124457081867787,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 5782
    },
    {
      "epoch": 0.05783,
      "grad_norm": 0.8785108285243248,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 5783
    },
    {
      "epoch": 0.05784,
      "grad_norm": 0.9665672722542055,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 5784
    },
    {
      "epoch": 0.05785,
      "grad_norm": 1.0852885954128955,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 5785
    },
    {
      "epoch": 0.05786,
      "grad_norm": 0.9695946116446583,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 5786
    },
    {
      "epoch": 0.05787,
      "grad_norm": 1.0650628416583132,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 5787
    },
    {
      "epoch": 0.05788,
      "grad_norm": 0.7594393281247598,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 5788
    },
    {
      "epoch": 0.05789,
      "grad_norm": 0.7541805830813078,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 5789
    },
    {
      "epoch": 0.0579,
      "grad_norm": 0.8652244813315739,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 5790
    },
    {
      "epoch": 0.05791,
      "grad_norm": 0.8818398092099728,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 5791
    },
    {
      "epoch": 0.05792,
      "grad_norm": 0.7291575921298588,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 5792
    },
    {
      "epoch": 0.05793,
      "grad_norm": 0.7978555275990008,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 5793
    },
    {
      "epoch": 0.05794,
      "grad_norm": 0.9171123664036167,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 5794
    },
    {
      "epoch": 0.05795,
      "grad_norm": 0.8607991972557187,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 5795
    },
    {
      "epoch": 0.05796,
      "grad_norm": 0.7463660684838923,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 5796
    },
    {
      "epoch": 0.05797,
      "grad_norm": 0.7284446091050844,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 5797
    },
    {
      "epoch": 0.05798,
      "grad_norm": 0.9660475572533875,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 5798
    },
    {
      "epoch": 0.05799,
      "grad_norm": 1.5107968538940575,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 5799
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.8675842372107159,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 5800
    },
    {
      "epoch": 0.05801,
      "grad_norm": 1.0182860429769818,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 5801
    },
    {
      "epoch": 0.05802,
      "grad_norm": 0.8847441154270007,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 5802
    },
    {
      "epoch": 0.05803,
      "grad_norm": 0.9726482455890011,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 5803
    },
    {
      "epoch": 0.05804,
      "grad_norm": 1.0017317638615917,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 5804
    },
    {
      "epoch": 0.05805,
      "grad_norm": 1.1460034384721858,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 5805
    },
    {
      "epoch": 0.05806,
      "grad_norm": 0.8265329809662282,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 5806
    },
    {
      "epoch": 0.05807,
      "grad_norm": 0.7999918311654125,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 5807
    },
    {
      "epoch": 0.05808,
      "grad_norm": 0.7978362275046719,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 5808
    },
    {
      "epoch": 0.05809,
      "grad_norm": 0.8854482959350243,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 5809
    },
    {
      "epoch": 0.0581,
      "grad_norm": 1.0871463000956632,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 5810
    },
    {
      "epoch": 0.05811,
      "grad_norm": 1.1086945676399056,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 5811
    },
    {
      "epoch": 0.05812,
      "grad_norm": 0.9304552613092574,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 5812
    },
    {
      "epoch": 0.05813,
      "grad_norm": 0.9983452335160318,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 5813
    },
    {
      "epoch": 0.05814,
      "grad_norm": 0.9975951286224842,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 5814
    },
    {
      "epoch": 0.05815,
      "grad_norm": 0.8924647102734461,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 5815
    },
    {
      "epoch": 0.05816,
      "grad_norm": 0.9225284620016561,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 5816
    },
    {
      "epoch": 0.05817,
      "grad_norm": 0.9979994283490636,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 5817
    },
    {
      "epoch": 0.05818,
      "grad_norm": 1.0439768447035485,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 5818
    },
    {
      "epoch": 0.05819,
      "grad_norm": 1.033649032838979,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 5819
    },
    {
      "epoch": 0.0582,
      "grad_norm": 1.1096343593879232,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 5820
    },
    {
      "epoch": 0.05821,
      "grad_norm": 0.9004393239006013,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 5821
    },
    {
      "epoch": 0.05822,
      "grad_norm": 1.0863173751230966,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 5822
    },
    {
      "epoch": 0.05823,
      "grad_norm": 1.0599324875126475,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 5823
    },
    {
      "epoch": 0.05824,
      "grad_norm": 0.8876300345105752,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 5824
    },
    {
      "epoch": 0.05825,
      "grad_norm": 1.0008579052062283,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 5825
    },
    {
      "epoch": 0.05826,
      "grad_norm": 1.1748225539903427,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 5826
    },
    {
      "epoch": 0.05827,
      "grad_norm": 0.9593447519304307,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 5827
    },
    {
      "epoch": 0.05828,
      "grad_norm": 1.0225296000471948,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 5828
    },
    {
      "epoch": 0.05829,
      "grad_norm": 1.0560219694488389,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 5829
    },
    {
      "epoch": 0.0583,
      "grad_norm": 0.9860898705161171,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 5830
    },
    {
      "epoch": 0.05831,
      "grad_norm": 1.1250031799560922,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 5831
    },
    {
      "epoch": 0.05832,
      "grad_norm": 1.157236041461952,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 5832
    },
    {
      "epoch": 0.05833,
      "grad_norm": 0.978478470266213,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 5833
    },
    {
      "epoch": 0.05834,
      "grad_norm": 0.9798668444646972,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 5834
    },
    {
      "epoch": 0.05835,
      "grad_norm": 0.7832001924434391,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 5835
    },
    {
      "epoch": 0.05836,
      "grad_norm": 0.8249432319588443,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 5836
    },
    {
      "epoch": 0.05837,
      "grad_norm": 0.9729603126849787,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 5837
    },
    {
      "epoch": 0.05838,
      "grad_norm": 0.9502514262519923,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 5838
    },
    {
      "epoch": 0.05839,
      "grad_norm": 0.9267029498844601,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 5839
    },
    {
      "epoch": 0.0584,
      "grad_norm": 0.9657962011078538,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 5840
    },
    {
      "epoch": 0.05841,
      "grad_norm": 1.0401049872356143,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 5841
    },
    {
      "epoch": 0.05842,
      "grad_norm": 1.1291241708881259,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 5842
    },
    {
      "epoch": 0.05843,
      "grad_norm": 1.0657328740989145,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 5843
    },
    {
      "epoch": 0.05844,
      "grad_norm": 0.936956030075049,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 5844
    },
    {
      "epoch": 0.05845,
      "grad_norm": 0.954122564913459,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 5845
    },
    {
      "epoch": 0.05846,
      "grad_norm": 1.0055872032953703,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 5846
    },
    {
      "epoch": 0.05847,
      "grad_norm": 1.0847001970093764,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 5847
    },
    {
      "epoch": 0.05848,
      "grad_norm": 0.9328803987155841,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 5848
    },
    {
      "epoch": 0.05849,
      "grad_norm": 1.0216731452531402,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 5849
    },
    {
      "epoch": 0.0585,
      "grad_norm": 1.042034861189281,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 5850
    },
    {
      "epoch": 0.05851,
      "grad_norm": 0.9696375907305427,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 5851
    },
    {
      "epoch": 0.05852,
      "grad_norm": 0.9548659469705912,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 5852
    },
    {
      "epoch": 0.05853,
      "grad_norm": 0.9934590507196531,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 5853
    },
    {
      "epoch": 0.05854,
      "grad_norm": 1.109837806391397,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 5854
    },
    {
      "epoch": 0.05855,
      "grad_norm": 0.9612662531040636,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 5855
    },
    {
      "epoch": 0.05856,
      "grad_norm": 0.9262688978815322,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 5856
    },
    {
      "epoch": 0.05857,
      "grad_norm": 1.0672841758813956,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 5857
    },
    {
      "epoch": 0.05858,
      "grad_norm": 0.9924933017958931,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 5858
    },
    {
      "epoch": 0.05859,
      "grad_norm": 1.1446711154327815,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 5859
    },
    {
      "epoch": 0.0586,
      "grad_norm": 0.9395157838833672,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 5860
    },
    {
      "epoch": 0.05861,
      "grad_norm": 1.017783268004518,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 5861
    },
    {
      "epoch": 0.05862,
      "grad_norm": 0.8461181473215044,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 5862
    },
    {
      "epoch": 0.05863,
      "grad_norm": 0.727622134447459,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 5863
    },
    {
      "epoch": 0.05864,
      "grad_norm": 0.7787266994865726,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 5864
    },
    {
      "epoch": 0.05865,
      "grad_norm": 0.9457836140127373,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 5865
    },
    {
      "epoch": 0.05866,
      "grad_norm": 1.1590380983819644,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 5866
    },
    {
      "epoch": 0.05867,
      "grad_norm": 0.8661814704210331,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 5867
    },
    {
      "epoch": 0.05868,
      "grad_norm": 0.7841997946264219,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 5868
    },
    {
      "epoch": 0.05869,
      "grad_norm": 0.9021971418732975,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 5869
    },
    {
      "epoch": 0.0587,
      "grad_norm": 1.044429489265089,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 5870
    },
    {
      "epoch": 0.05871,
      "grad_norm": 0.9528123707330137,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 5871
    },
    {
      "epoch": 0.05872,
      "grad_norm": 1.0247575002414453,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 5872
    },
    {
      "epoch": 0.05873,
      "grad_norm": 1.1855135281087743,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 5873
    },
    {
      "epoch": 0.05874,
      "grad_norm": 0.9873466030307598,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 5874
    },
    {
      "epoch": 0.05875,
      "grad_norm": 1.1663397858316737,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 5875
    },
    {
      "epoch": 0.05876,
      "grad_norm": 1.2258462137763335,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 5876
    },
    {
      "epoch": 0.05877,
      "grad_norm": 1.0386697354012882,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 5877
    },
    {
      "epoch": 0.05878,
      "grad_norm": 1.183742461564236,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 5878
    },
    {
      "epoch": 0.05879,
      "grad_norm": 0.940962143351869,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 5879
    },
    {
      "epoch": 0.0588,
      "grad_norm": 0.927043037474344,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 5880
    },
    {
      "epoch": 0.05881,
      "grad_norm": 0.8962578128398271,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 5881
    },
    {
      "epoch": 0.05882,
      "grad_norm": 0.9066915820041124,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 5882
    },
    {
      "epoch": 0.05883,
      "grad_norm": 0.8598805143317192,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 5883
    },
    {
      "epoch": 0.05884,
      "grad_norm": 1.0114326319466251,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 5884
    },
    {
      "epoch": 0.05885,
      "grad_norm": 1.1324207199773164,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 5885
    },
    {
      "epoch": 0.05886,
      "grad_norm": 0.9079205076328346,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 5886
    },
    {
      "epoch": 0.05887,
      "grad_norm": 0.9499292804842742,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 5887
    },
    {
      "epoch": 0.05888,
      "grad_norm": 0.9253742487047153,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 5888
    },
    {
      "epoch": 0.05889,
      "grad_norm": 0.9756593540292708,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 5889
    },
    {
      "epoch": 0.0589,
      "grad_norm": 1.2153968005149314,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 5890
    },
    {
      "epoch": 0.05891,
      "grad_norm": 1.1050096751560372,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 5891
    },
    {
      "epoch": 0.05892,
      "grad_norm": 1.0123025142968227,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 5892
    },
    {
      "epoch": 0.05893,
      "grad_norm": 0.9835328234583312,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 5893
    },
    {
      "epoch": 0.05894,
      "grad_norm": 0.9304265933341809,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 5894
    },
    {
      "epoch": 0.05895,
      "grad_norm": 0.9274436106212893,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 5895
    },
    {
      "epoch": 0.05896,
      "grad_norm": 0.8771867029491986,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 5896
    },
    {
      "epoch": 0.05897,
      "grad_norm": 0.8218952866171018,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 5897
    },
    {
      "epoch": 0.05898,
      "grad_norm": 0.9430573967743275,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 5898
    },
    {
      "epoch": 0.05899,
      "grad_norm": 0.8911678825157316,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 5899
    },
    {
      "epoch": 0.059,
      "grad_norm": 0.9174879047193627,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 5900
    },
    {
      "epoch": 0.05901,
      "grad_norm": 0.9824565061579317,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 5901
    },
    {
      "epoch": 0.05902,
      "grad_norm": 1.240010288158423,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 5902
    },
    {
      "epoch": 0.05903,
      "grad_norm": 0.7639178515927444,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 5903
    },
    {
      "epoch": 0.05904,
      "grad_norm": 0.770306018643173,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 5904
    },
    {
      "epoch": 0.05905,
      "grad_norm": 0.976148342284057,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 5905
    },
    {
      "epoch": 0.05906,
      "grad_norm": 1.1812920524478123,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 5906
    },
    {
      "epoch": 0.05907,
      "grad_norm": 1.1654915411869895,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 5907
    },
    {
      "epoch": 0.05908,
      "grad_norm": 1.120465653132924,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 5908
    },
    {
      "epoch": 0.05909,
      "grad_norm": 0.8539512494382139,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 5909
    },
    {
      "epoch": 0.0591,
      "grad_norm": 0.8227369203330751,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 5910
    },
    {
      "epoch": 0.05911,
      "grad_norm": 0.8906442288033303,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 5911
    },
    {
      "epoch": 0.05912,
      "grad_norm": 0.9509533493887127,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 5912
    },
    {
      "epoch": 0.05913,
      "grad_norm": 1.0981979298361981,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 5913
    },
    {
      "epoch": 0.05914,
      "grad_norm": 0.9415620615571133,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 5914
    },
    {
      "epoch": 0.05915,
      "grad_norm": 0.8478332715846145,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 5915
    },
    {
      "epoch": 0.05916,
      "grad_norm": 0.8256880500590459,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 5916
    },
    {
      "epoch": 0.05917,
      "grad_norm": 0.9790506827233469,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 5917
    },
    {
      "epoch": 0.05918,
      "grad_norm": 1.0353580913196638,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 5918
    },
    {
      "epoch": 0.05919,
      "grad_norm": 0.9651964696003633,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 5919
    },
    {
      "epoch": 0.0592,
      "grad_norm": 0.937291761754371,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 5920
    },
    {
      "epoch": 0.05921,
      "grad_norm": 1.0990872808141368,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 5921
    },
    {
      "epoch": 0.05922,
      "grad_norm": 1.0468886911915498,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 5922
    },
    {
      "epoch": 0.05923,
      "grad_norm": 1.0323600516250746,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 5923
    },
    {
      "epoch": 0.05924,
      "grad_norm": 0.954016613180186,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 5924
    },
    {
      "epoch": 0.05925,
      "grad_norm": 0.9475491801291553,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 5925
    },
    {
      "epoch": 0.05926,
      "grad_norm": 0.9668145623340975,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 5926
    },
    {
      "epoch": 0.05927,
      "grad_norm": 1.0595618769436952,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 5927
    },
    {
      "epoch": 0.05928,
      "grad_norm": 0.9855002059444532,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 5928
    },
    {
      "epoch": 0.05929,
      "grad_norm": 0.945307973831574,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 5929
    },
    {
      "epoch": 0.0593,
      "grad_norm": 0.9657299567775556,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 5930
    },
    {
      "epoch": 0.05931,
      "grad_norm": 0.9827228555033615,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 5931
    },
    {
      "epoch": 0.05932,
      "grad_norm": 1.156077533419125,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 5932
    },
    {
      "epoch": 0.05933,
      "grad_norm": 0.847492777721161,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 5933
    },
    {
      "epoch": 0.05934,
      "grad_norm": 1.0021937329107191,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 5934
    },
    {
      "epoch": 0.05935,
      "grad_norm": 1.1159732242811857,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 5935
    },
    {
      "epoch": 0.05936,
      "grad_norm": 0.899401377373719,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 5936
    },
    {
      "epoch": 0.05937,
      "grad_norm": 0.8301027331575037,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 5937
    },
    {
      "epoch": 0.05938,
      "grad_norm": 0.8762725606178311,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 5938
    },
    {
      "epoch": 0.05939,
      "grad_norm": 0.9126590668115435,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 5939
    },
    {
      "epoch": 0.0594,
      "grad_norm": 1.0407767907462304,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 5940
    },
    {
      "epoch": 0.05941,
      "grad_norm": 1.1143892345234092,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 5941
    },
    {
      "epoch": 0.05942,
      "grad_norm": 1.0961434922540771,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 5942
    },
    {
      "epoch": 0.05943,
      "grad_norm": 1.0471390550371067,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 5943
    },
    {
      "epoch": 0.05944,
      "grad_norm": 0.9579622530263362,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 5944
    },
    {
      "epoch": 0.05945,
      "grad_norm": 0.9926199469417607,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 5945
    },
    {
      "epoch": 0.05946,
      "grad_norm": 1.0156252547795441,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 5946
    },
    {
      "epoch": 0.05947,
      "grad_norm": 1.2611887927782788,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 5947
    },
    {
      "epoch": 0.05948,
      "grad_norm": 0.9815730257128612,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 5948
    },
    {
      "epoch": 0.05949,
      "grad_norm": 1.0471598623999925,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 5949
    },
    {
      "epoch": 0.0595,
      "grad_norm": 0.8842194048874816,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 5950
    },
    {
      "epoch": 0.05951,
      "grad_norm": 0.8826262827071101,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 5951
    },
    {
      "epoch": 0.05952,
      "grad_norm": 0.8874639709587795,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 5952
    },
    {
      "epoch": 0.05953,
      "grad_norm": 1.094346129739514,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 5953
    },
    {
      "epoch": 0.05954,
      "grad_norm": 1.0405342399228923,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 5954
    },
    {
      "epoch": 0.05955,
      "grad_norm": 1.0084545393678892,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 5955
    },
    {
      "epoch": 0.05956,
      "grad_norm": 0.8681329007495994,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 5956
    },
    {
      "epoch": 0.05957,
      "grad_norm": 0.9787011417999288,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 5957
    },
    {
      "epoch": 0.05958,
      "grad_norm": 0.983255913377767,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 5958
    },
    {
      "epoch": 0.05959,
      "grad_norm": 0.9397652065747355,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 5959
    },
    {
      "epoch": 0.0596,
      "grad_norm": 0.9197247886041665,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 5960
    },
    {
      "epoch": 0.05961,
      "grad_norm": 0.975726486158997,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 5961
    },
    {
      "epoch": 0.05962,
      "grad_norm": 1.0416902503797958,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 5962
    },
    {
      "epoch": 0.05963,
      "grad_norm": 1.0805802820393233,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 5963
    },
    {
      "epoch": 0.05964,
      "grad_norm": 0.9125810985534502,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 5964
    },
    {
      "epoch": 0.05965,
      "grad_norm": 0.7839568155281216,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 5965
    },
    {
      "epoch": 0.05966,
      "grad_norm": 0.645579903658523,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 5966
    },
    {
      "epoch": 0.05967,
      "grad_norm": 0.6174059038324572,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 5967
    },
    {
      "epoch": 0.05968,
      "grad_norm": 0.6892480108767447,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 5968
    },
    {
      "epoch": 0.05969,
      "grad_norm": 0.7254045272144456,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 5969
    },
    {
      "epoch": 0.0597,
      "grad_norm": 0.7953745734437678,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 5970
    },
    {
      "epoch": 0.05971,
      "grad_norm": 1.0581208502754886,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 5971
    },
    {
      "epoch": 0.05972,
      "grad_norm": 1.1798253869062356,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 5972
    },
    {
      "epoch": 0.05973,
      "grad_norm": 0.7544716901026084,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 5973
    },
    {
      "epoch": 0.05974,
      "grad_norm": 0.7411406547879388,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 5974
    },
    {
      "epoch": 0.05975,
      "grad_norm": 0.664439857328635,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 5975
    },
    {
      "epoch": 0.05976,
      "grad_norm": 0.684227141132107,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 5976
    },
    {
      "epoch": 0.05977,
      "grad_norm": 0.8888845505907672,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 5977
    },
    {
      "epoch": 0.05978,
      "grad_norm": 1.1416281876950884,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 5978
    },
    {
      "epoch": 0.05979,
      "grad_norm": 1.008454254020505,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 5979
    },
    {
      "epoch": 0.0598,
      "grad_norm": 1.190632055338276,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 5980
    },
    {
      "epoch": 0.05981,
      "grad_norm": 1.015356754441631,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 5981
    },
    {
      "epoch": 0.05982,
      "grad_norm": 0.9378130097416635,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 5982
    },
    {
      "epoch": 0.05983,
      "grad_norm": 0.8053048489511819,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 5983
    },
    {
      "epoch": 0.05984,
      "grad_norm": 0.9819349814850642,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 5984
    },
    {
      "epoch": 0.05985,
      "grad_norm": 1.1131918756985963,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 5985
    },
    {
      "epoch": 0.05986,
      "grad_norm": 0.9174366752153941,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 5986
    },
    {
      "epoch": 0.05987,
      "grad_norm": 1.0378256913235493,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 5987
    },
    {
      "epoch": 0.05988,
      "grad_norm": 0.9258303467488307,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 5988
    },
    {
      "epoch": 0.05989,
      "grad_norm": 0.9632304797304597,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 5989
    },
    {
      "epoch": 0.0599,
      "grad_norm": 0.8935903017412864,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 5990
    },
    {
      "epoch": 0.05991,
      "grad_norm": 0.6739742445063621,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 5991
    },
    {
      "epoch": 0.05992,
      "grad_norm": 0.5915614354430911,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 5992
    },
    {
      "epoch": 0.05993,
      "grad_norm": 0.741024166202095,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 5993
    },
    {
      "epoch": 0.05994,
      "grad_norm": 0.8841179715786665,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 5994
    },
    {
      "epoch": 0.05995,
      "grad_norm": 1.287538920218187,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 5995
    },
    {
      "epoch": 0.05996,
      "grad_norm": 1.0684098332314382,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 5996
    },
    {
      "epoch": 0.05997,
      "grad_norm": 0.9397674955044001,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 5997
    },
    {
      "epoch": 0.05998,
      "grad_norm": 0.8272889396102959,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 5998
    },
    {
      "epoch": 0.05999,
      "grad_norm": 0.8991148255094739,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 5999
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.9185220408167984,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 6000
    },
    {
      "epoch": 0.06001,
      "grad_norm": 0.9908154950299144,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 6001
    },
    {
      "epoch": 0.06002,
      "grad_norm": 0.9608103918830759,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 6002
    },
    {
      "epoch": 0.06003,
      "grad_norm": 1.1215832189472665,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 6003
    },
    {
      "epoch": 0.06004,
      "grad_norm": 1.0570354371095005,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 6004
    },
    {
      "epoch": 0.06005,
      "grad_norm": 1.0877288777978935,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 6005
    },
    {
      "epoch": 0.06006,
      "grad_norm": 1.0530975014128934,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 6006
    },
    {
      "epoch": 0.06007,
      "grad_norm": 0.8755728290132648,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 6007
    },
    {
      "epoch": 0.06008,
      "grad_norm": 0.9019088575660325,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 6008
    },
    {
      "epoch": 0.06009,
      "grad_norm": 1.1772437890538816,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 6009
    },
    {
      "epoch": 0.0601,
      "grad_norm": 0.9527109892195108,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 6010
    },
    {
      "epoch": 0.06011,
      "grad_norm": 1.1738757885020588,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 6011
    },
    {
      "epoch": 0.06012,
      "grad_norm": 0.8780691584223036,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 6012
    },
    {
      "epoch": 0.06013,
      "grad_norm": 0.9179139070213036,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 6013
    },
    {
      "epoch": 0.06014,
      "grad_norm": 1.00855448654704,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 6014
    },
    {
      "epoch": 0.06015,
      "grad_norm": 0.9972560180447895,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 6015
    },
    {
      "epoch": 0.06016,
      "grad_norm": 1.0775312684009026,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 6016
    },
    {
      "epoch": 0.06017,
      "grad_norm": 1.33609316921924,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 6017
    },
    {
      "epoch": 0.06018,
      "grad_norm": 1.0280471651185956,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 6018
    },
    {
      "epoch": 0.06019,
      "grad_norm": 1.0585602447833204,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 6019
    },
    {
      "epoch": 0.0602,
      "grad_norm": 0.9692400850494943,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 6020
    },
    {
      "epoch": 0.06021,
      "grad_norm": 1.0415586611820176,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 6021
    },
    {
      "epoch": 0.06022,
      "grad_norm": 1.055493914814788,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 6022
    },
    {
      "epoch": 0.06023,
      "grad_norm": 1.1437368963667776,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 6023
    },
    {
      "epoch": 0.06024,
      "grad_norm": 1.0461727220486359,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 6024
    },
    {
      "epoch": 0.06025,
      "grad_norm": 0.8495924516845805,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 6025
    },
    {
      "epoch": 0.06026,
      "grad_norm": 0.8387211090444705,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 6026
    },
    {
      "epoch": 0.06027,
      "grad_norm": 0.9070801722224514,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 6027
    },
    {
      "epoch": 0.06028,
      "grad_norm": 0.9625380625910851,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 6028
    },
    {
      "epoch": 0.06029,
      "grad_norm": 1.0682457520452102,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 6029
    },
    {
      "epoch": 0.0603,
      "grad_norm": 0.9976915475426508,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 6030
    },
    {
      "epoch": 0.06031,
      "grad_norm": 1.0453095190042574,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 6031
    },
    {
      "epoch": 0.06032,
      "grad_norm": 0.9786653778595836,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 6032
    },
    {
      "epoch": 0.06033,
      "grad_norm": 1.0968968530547012,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 6033
    },
    {
      "epoch": 0.06034,
      "grad_norm": 0.9514397371995961,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 6034
    },
    {
      "epoch": 0.06035,
      "grad_norm": 1.0541817557308053,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 6035
    },
    {
      "epoch": 0.06036,
      "grad_norm": 0.8780263970411091,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 6036
    },
    {
      "epoch": 0.06037,
      "grad_norm": 1.0193481447935269,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 6037
    },
    {
      "epoch": 0.06038,
      "grad_norm": 1.3774676565861503,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 6038
    },
    {
      "epoch": 0.06039,
      "grad_norm": 0.7252838059348526,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 6039
    },
    {
      "epoch": 0.0604,
      "grad_norm": 0.78601742451761,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 6040
    },
    {
      "epoch": 0.06041,
      "grad_norm": 0.744786447804636,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 6041
    },
    {
      "epoch": 0.06042,
      "grad_norm": 0.8660278386592278,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 6042
    },
    {
      "epoch": 0.06043,
      "grad_norm": 1.1071573404697885,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 6043
    },
    {
      "epoch": 0.06044,
      "grad_norm": 1.0894101622044046,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 6044
    },
    {
      "epoch": 0.06045,
      "grad_norm": 1.0493254983595819,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 6045
    },
    {
      "epoch": 0.06046,
      "grad_norm": 1.0049917198936318,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 6046
    },
    {
      "epoch": 0.06047,
      "grad_norm": 1.0517277702815098,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 6047
    },
    {
      "epoch": 0.06048,
      "grad_norm": 1.1809116249025131,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 6048
    },
    {
      "epoch": 0.06049,
      "grad_norm": 0.9623062437443599,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 6049
    },
    {
      "epoch": 0.0605,
      "grad_norm": 1.0923522103941121,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 6050
    },
    {
      "epoch": 0.06051,
      "grad_norm": 1.0265427925273334,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 6051
    },
    {
      "epoch": 0.06052,
      "grad_norm": 0.9414887816443991,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 6052
    },
    {
      "epoch": 0.06053,
      "grad_norm": 1.006065905595286,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 6053
    },
    {
      "epoch": 0.06054,
      "grad_norm": 1.095910787052634,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 6054
    },
    {
      "epoch": 0.06055,
      "grad_norm": 0.8517266184871414,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 6055
    },
    {
      "epoch": 0.06056,
      "grad_norm": 1.1390125640686093,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 6056
    },
    {
      "epoch": 0.06057,
      "grad_norm": 1.0285564034956292,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 6057
    },
    {
      "epoch": 0.06058,
      "grad_norm": 1.0733695934055711,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 6058
    },
    {
      "epoch": 0.06059,
      "grad_norm": 0.8565845158282416,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 6059
    },
    {
      "epoch": 0.0606,
      "grad_norm": 0.8864003378041345,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 6060
    },
    {
      "epoch": 0.06061,
      "grad_norm": 1.0109993363665308,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 6061
    },
    {
      "epoch": 0.06062,
      "grad_norm": 1.123107920080809,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 6062
    },
    {
      "epoch": 0.06063,
      "grad_norm": 0.949609814613259,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 6063
    },
    {
      "epoch": 0.06064,
      "grad_norm": 0.9981845823778703,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 6064
    },
    {
      "epoch": 0.06065,
      "grad_norm": 1.1017349391298685,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 6065
    },
    {
      "epoch": 0.06066,
      "grad_norm": 1.0732527006387818,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 6066
    },
    {
      "epoch": 0.06067,
      "grad_norm": 0.9294025545570336,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 6067
    },
    {
      "epoch": 0.06068,
      "grad_norm": 1.0576231179156892,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 6068
    },
    {
      "epoch": 0.06069,
      "grad_norm": 1.0865552407589456,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 6069
    },
    {
      "epoch": 0.0607,
      "grad_norm": 1.0230751617256044,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 6070
    },
    {
      "epoch": 0.06071,
      "grad_norm": 0.999221755293697,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 6071
    },
    {
      "epoch": 0.06072,
      "grad_norm": 1.066655819343783,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 6072
    },
    {
      "epoch": 0.06073,
      "grad_norm": 1.0369526156758926,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 6073
    },
    {
      "epoch": 0.06074,
      "grad_norm": 1.075598582264553,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 6074
    },
    {
      "epoch": 0.06075,
      "grad_norm": 0.9953552796029778,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 6075
    },
    {
      "epoch": 0.06076,
      "grad_norm": 0.9778883253174944,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 6076
    },
    {
      "epoch": 0.06077,
      "grad_norm": 0.8468031501573927,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 6077
    },
    {
      "epoch": 0.06078,
      "grad_norm": 0.8305408752084648,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 6078
    },
    {
      "epoch": 0.06079,
      "grad_norm": 0.7960215844617263,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 6079
    },
    {
      "epoch": 0.0608,
      "grad_norm": 0.8359710386518069,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 6080
    },
    {
      "epoch": 0.06081,
      "grad_norm": 1.0786121941538231,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 6081
    },
    {
      "epoch": 0.06082,
      "grad_norm": 1.1823183295621211,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 6082
    },
    {
      "epoch": 0.06083,
      "grad_norm": 0.8837771871886293,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 6083
    },
    {
      "epoch": 0.06084,
      "grad_norm": 0.7204472009727627,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 6084
    },
    {
      "epoch": 0.06085,
      "grad_norm": 0.7103714572162915,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 6085
    },
    {
      "epoch": 0.06086,
      "grad_norm": 0.811997878307769,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 6086
    },
    {
      "epoch": 0.06087,
      "grad_norm": 0.8073780584157964,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 6087
    },
    {
      "epoch": 0.06088,
      "grad_norm": 0.8544463123489077,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 6088
    },
    {
      "epoch": 0.06089,
      "grad_norm": 0.9762980822164021,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 6089
    },
    {
      "epoch": 0.0609,
      "grad_norm": 1.2142304992327353,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 6090
    },
    {
      "epoch": 0.06091,
      "grad_norm": 1.082695490566579,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 6091
    },
    {
      "epoch": 0.06092,
      "grad_norm": 0.9278093493414128,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 6092
    },
    {
      "epoch": 0.06093,
      "grad_norm": 0.8690782403614105,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 6093
    },
    {
      "epoch": 0.06094,
      "grad_norm": 0.8570778358605303,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 6094
    },
    {
      "epoch": 0.06095,
      "grad_norm": 0.8138189413941755,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 6095
    },
    {
      "epoch": 0.06096,
      "grad_norm": 0.9634421284092561,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 6096
    },
    {
      "epoch": 0.06097,
      "grad_norm": 1.0632007099690335,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 6097
    },
    {
      "epoch": 0.06098,
      "grad_norm": 1.0871230047693712,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 6098
    },
    {
      "epoch": 0.06099,
      "grad_norm": 1.0812665303937632,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 6099
    },
    {
      "epoch": 0.061,
      "grad_norm": 0.9904572164775792,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 6100
    },
    {
      "epoch": 0.06101,
      "grad_norm": 1.2471241797218284,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 6101
    },
    {
      "epoch": 0.06102,
      "grad_norm": 0.9331918176411176,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 6102
    },
    {
      "epoch": 0.06103,
      "grad_norm": 1.0358350532613663,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 6103
    },
    {
      "epoch": 0.06104,
      "grad_norm": 0.9863138814409834,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 6104
    },
    {
      "epoch": 0.06105,
      "grad_norm": 1.2154265908459023,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 6105
    },
    {
      "epoch": 0.06106,
      "grad_norm": 0.8917304789437333,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 6106
    },
    {
      "epoch": 0.06107,
      "grad_norm": 0.9427677486805567,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 6107
    },
    {
      "epoch": 0.06108,
      "grad_norm": 0.9771691970851738,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 6108
    },
    {
      "epoch": 0.06109,
      "grad_norm": 1.0200829167924297,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 6109
    },
    {
      "epoch": 0.0611,
      "grad_norm": 1.0829855702592046,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 6110
    },
    {
      "epoch": 0.06111,
      "grad_norm": 1.11913249179738,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 6111
    },
    {
      "epoch": 0.06112,
      "grad_norm": 0.9956612041062181,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 6112
    },
    {
      "epoch": 0.06113,
      "grad_norm": 1.1071104171417576,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 6113
    },
    {
      "epoch": 0.06114,
      "grad_norm": 0.9095207699385705,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 6114
    },
    {
      "epoch": 0.06115,
      "grad_norm": 1.044965318613425,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 6115
    },
    {
      "epoch": 0.06116,
      "grad_norm": 1.2061603685848585,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 6116
    },
    {
      "epoch": 0.06117,
      "grad_norm": 0.9331914752531237,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 6117
    },
    {
      "epoch": 0.06118,
      "grad_norm": 0.9125472343562069,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 6118
    },
    {
      "epoch": 0.06119,
      "grad_norm": 0.7893638895463538,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 6119
    },
    {
      "epoch": 0.0612,
      "grad_norm": 0.9244864031831111,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 6120
    },
    {
      "epoch": 0.06121,
      "grad_norm": 0.9033631978733891,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 6121
    },
    {
      "epoch": 0.06122,
      "grad_norm": 1.0308394729354522,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 6122
    },
    {
      "epoch": 0.06123,
      "grad_norm": 0.9549347100846058,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 6123
    },
    {
      "epoch": 0.06124,
      "grad_norm": 1.018979699685598,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 6124
    },
    {
      "epoch": 0.06125,
      "grad_norm": 1.0396926470286123,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 6125
    },
    {
      "epoch": 0.06126,
      "grad_norm": 0.970338770093608,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 6126
    },
    {
      "epoch": 0.06127,
      "grad_norm": 0.9481470691411102,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 6127
    },
    {
      "epoch": 0.06128,
      "grad_norm": 1.0848389247188306,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 6128
    },
    {
      "epoch": 0.06129,
      "grad_norm": 1.0580408480527546,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 6129
    },
    {
      "epoch": 0.0613,
      "grad_norm": 0.9682253621336249,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 6130
    },
    {
      "epoch": 0.06131,
      "grad_norm": 0.9210655524658186,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 6131
    },
    {
      "epoch": 0.06132,
      "grad_norm": 0.9155041280961395,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 6132
    },
    {
      "epoch": 0.06133,
      "grad_norm": 0.9789482932440452,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 6133
    },
    {
      "epoch": 0.06134,
      "grad_norm": 1.1135362136558498,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 6134
    },
    {
      "epoch": 0.06135,
      "grad_norm": 1.0670150504515052,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 6135
    },
    {
      "epoch": 0.06136,
      "grad_norm": 1.009306468929253,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 6136
    },
    {
      "epoch": 0.06137,
      "grad_norm": 1.1199332436482432,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 6137
    },
    {
      "epoch": 0.06138,
      "grad_norm": 0.9822960858920264,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 6138
    },
    {
      "epoch": 0.06139,
      "grad_norm": 1.061123881008166,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 6139
    },
    {
      "epoch": 0.0614,
      "grad_norm": 1.1067582756640688,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 6140
    },
    {
      "epoch": 0.06141,
      "grad_norm": 1.1197623616073298,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 6141
    },
    {
      "epoch": 0.06142,
      "grad_norm": 0.9621463377982679,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 6142
    },
    {
      "epoch": 0.06143,
      "grad_norm": 0.9823524748170486,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 6143
    },
    {
      "epoch": 0.06144,
      "grad_norm": 1.1256975016048572,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 6144
    },
    {
      "epoch": 0.06145,
      "grad_norm": 1.0964390916024676,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 6145
    },
    {
      "epoch": 0.06146,
      "grad_norm": 0.9340305536724448,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 6146
    },
    {
      "epoch": 0.06147,
      "grad_norm": 0.9639235550819623,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 6147
    },
    {
      "epoch": 0.06148,
      "grad_norm": 1.1141344848316928,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 6148
    },
    {
      "epoch": 0.06149,
      "grad_norm": 1.0008166080107452,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 6149
    },
    {
      "epoch": 0.0615,
      "grad_norm": 1.291300378500113,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 6150
    },
    {
      "epoch": 0.06151,
      "grad_norm": 0.9179128052204282,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 6151
    },
    {
      "epoch": 0.06152,
      "grad_norm": 0.83811806335343,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 6152
    },
    {
      "epoch": 0.06153,
      "grad_norm": 1.0325588836325448,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 6153
    },
    {
      "epoch": 0.06154,
      "grad_norm": 1.0531062692602984,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 6154
    },
    {
      "epoch": 0.06155,
      "grad_norm": 0.9258868598309281,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 6155
    },
    {
      "epoch": 0.06156,
      "grad_norm": 0.8537558991823246,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 6156
    },
    {
      "epoch": 0.06157,
      "grad_norm": 0.915762781550664,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 6157
    },
    {
      "epoch": 0.06158,
      "grad_norm": 0.8548951465188689,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 6158
    },
    {
      "epoch": 0.06159,
      "grad_norm": 0.8765672907860784,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 6159
    },
    {
      "epoch": 0.0616,
      "grad_norm": 0.9374358381023599,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 6160
    },
    {
      "epoch": 0.06161,
      "grad_norm": 1.1299972768267692,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 6161
    },
    {
      "epoch": 0.06162,
      "grad_norm": 0.9317143658408135,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 6162
    },
    {
      "epoch": 0.06163,
      "grad_norm": 0.8061151783232723,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 6163
    },
    {
      "epoch": 0.06164,
      "grad_norm": 0.7615643215210022,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 6164
    },
    {
      "epoch": 0.06165,
      "grad_norm": 0.7769881554996289,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 6165
    },
    {
      "epoch": 0.06166,
      "grad_norm": 0.8085532275590331,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 6166
    },
    {
      "epoch": 0.06167,
      "grad_norm": 0.855796871730468,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 6167
    },
    {
      "epoch": 0.06168,
      "grad_norm": 1.138731517223366,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 6168
    },
    {
      "epoch": 0.06169,
      "grad_norm": 1.368396718368792,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 6169
    },
    {
      "epoch": 0.0617,
      "grad_norm": 0.7455936313181897,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 6170
    },
    {
      "epoch": 0.06171,
      "grad_norm": 0.7443727028390393,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 6171
    },
    {
      "epoch": 0.06172,
      "grad_norm": 0.9360131411969993,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 6172
    },
    {
      "epoch": 0.06173,
      "grad_norm": 1.0491839760469293,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 6173
    },
    {
      "epoch": 0.06174,
      "grad_norm": 0.9907442372695112,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 6174
    },
    {
      "epoch": 0.06175,
      "grad_norm": 1.0447539234835719,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 6175
    },
    {
      "epoch": 0.06176,
      "grad_norm": 1.010073314902336,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 6176
    },
    {
      "epoch": 0.06177,
      "grad_norm": 1.1559422707750275,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 6177
    },
    {
      "epoch": 0.06178,
      "grad_norm": 0.9526053774670727,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 6178
    },
    {
      "epoch": 0.06179,
      "grad_norm": 1.022987693024061,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 6179
    },
    {
      "epoch": 0.0618,
      "grad_norm": 1.0210445225525222,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 6180
    },
    {
      "epoch": 0.06181,
      "grad_norm": 0.9583722005427316,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 6181
    },
    {
      "epoch": 0.06182,
      "grad_norm": 1.0777328688245935,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 6182
    },
    {
      "epoch": 0.06183,
      "grad_norm": 1.1218720835460165,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 6183
    },
    {
      "epoch": 0.06184,
      "grad_norm": 0.8901736945496374,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 6184
    },
    {
      "epoch": 0.06185,
      "grad_norm": 0.7821970283133759,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 6185
    },
    {
      "epoch": 0.06186,
      "grad_norm": 0.8340351304034461,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 6186
    },
    {
      "epoch": 0.06187,
      "grad_norm": 0.826969525348011,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 6187
    },
    {
      "epoch": 0.06188,
      "grad_norm": 0.8321316542169037,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 6188
    },
    {
      "epoch": 0.06189,
      "grad_norm": 1.0085697971600716,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 6189
    },
    {
      "epoch": 0.0619,
      "grad_norm": 1.2088449819616511,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 6190
    },
    {
      "epoch": 0.06191,
      "grad_norm": 1.052029182606652,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 6191
    },
    {
      "epoch": 0.06192,
      "grad_norm": 0.9984353604886477,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 6192
    },
    {
      "epoch": 0.06193,
      "grad_norm": 1.2795032941383804,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 6193
    },
    {
      "epoch": 0.06194,
      "grad_norm": 0.7998102276672221,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 6194
    },
    {
      "epoch": 0.06195,
      "grad_norm": 0.7435037294270525,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 6195
    },
    {
      "epoch": 0.06196,
      "grad_norm": 0.9240469454378734,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 6196
    },
    {
      "epoch": 0.06197,
      "grad_norm": 1.1696222467202784,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 6197
    },
    {
      "epoch": 0.06198,
      "grad_norm": 0.9340202086200383,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 6198
    },
    {
      "epoch": 0.06199,
      "grad_norm": 0.8407582922074252,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 6199
    },
    {
      "epoch": 0.062,
      "grad_norm": 0.8797469108168021,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 6200
    },
    {
      "epoch": 0.06201,
      "grad_norm": 0.9885503711260389,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 6201
    },
    {
      "epoch": 0.06202,
      "grad_norm": 1.033995787879555,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 6202
    },
    {
      "epoch": 0.06203,
      "grad_norm": 0.9934884285362043,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 6203
    },
    {
      "epoch": 0.06204,
      "grad_norm": 1.2510710857513951,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 6204
    },
    {
      "epoch": 0.06205,
      "grad_norm": 0.9427630454595206,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 6205
    },
    {
      "epoch": 0.06206,
      "grad_norm": 0.9038925258511812,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 6206
    },
    {
      "epoch": 0.06207,
      "grad_norm": 0.86210908819428,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 6207
    },
    {
      "epoch": 0.06208,
      "grad_norm": 0.8083534046824736,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 6208
    },
    {
      "epoch": 0.06209,
      "grad_norm": 0.906637525628039,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 6209
    },
    {
      "epoch": 0.0621,
      "grad_norm": 1.1049751485709722,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 6210
    },
    {
      "epoch": 0.06211,
      "grad_norm": 0.8465215117222046,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 6211
    },
    {
      "epoch": 0.06212,
      "grad_norm": 0.926150960560891,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 6212
    },
    {
      "epoch": 0.06213,
      "grad_norm": 1.1848974987642524,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 6213
    },
    {
      "epoch": 0.06214,
      "grad_norm": 0.9608260526041966,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 6214
    },
    {
      "epoch": 0.06215,
      "grad_norm": 1.0487901084020896,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 6215
    },
    {
      "epoch": 0.06216,
      "grad_norm": 1.0157866198312488,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 6216
    },
    {
      "epoch": 0.06217,
      "grad_norm": 1.1412151022720478,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 6217
    },
    {
      "epoch": 0.06218,
      "grad_norm": 1.006247649190211,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 6218
    },
    {
      "epoch": 0.06219,
      "grad_norm": 1.2294549857751111,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 6219
    },
    {
      "epoch": 0.0622,
      "grad_norm": 0.995208064028826,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 6220
    },
    {
      "epoch": 0.06221,
      "grad_norm": 0.9162212484056992,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 6221
    },
    {
      "epoch": 0.06222,
      "grad_norm": 0.8454292925737381,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 6222
    },
    {
      "epoch": 0.06223,
      "grad_norm": 0.8608341935906129,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 6223
    },
    {
      "epoch": 0.06224,
      "grad_norm": 1.0517375571967738,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 6224
    },
    {
      "epoch": 0.06225,
      "grad_norm": 1.167553303659172,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 6225
    },
    {
      "epoch": 0.06226,
      "grad_norm": 0.9413390695324375,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 6226
    },
    {
      "epoch": 0.06227,
      "grad_norm": 1.1275346673821272,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 6227
    },
    {
      "epoch": 0.06228,
      "grad_norm": 1.071326538063058,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 6228
    },
    {
      "epoch": 0.06229,
      "grad_norm": 1.0923671993434136,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 6229
    },
    {
      "epoch": 0.0623,
      "grad_norm": 1.1044316515319128,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 6230
    },
    {
      "epoch": 0.06231,
      "grad_norm": 1.1192386233956613,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 6231
    },
    {
      "epoch": 0.06232,
      "grad_norm": 1.0749004721163167,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 6232
    },
    {
      "epoch": 0.06233,
      "grad_norm": 1.136441483294293,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 6233
    },
    {
      "epoch": 0.06234,
      "grad_norm": 1.096293829074208,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 6234
    },
    {
      "epoch": 0.06235,
      "grad_norm": 0.9421513147723747,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 6235
    },
    {
      "epoch": 0.06236,
      "grad_norm": 0.9920661776334326,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 6236
    },
    {
      "epoch": 0.06237,
      "grad_norm": 1.009784196859223,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 6237
    },
    {
      "epoch": 0.06238,
      "grad_norm": 0.9870780579077241,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 6238
    },
    {
      "epoch": 0.06239,
      "grad_norm": 1.2624770661972455,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 6239
    },
    {
      "epoch": 0.0624,
      "grad_norm": 0.8345122687975308,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 6240
    },
    {
      "epoch": 0.06241,
      "grad_norm": 0.8767674013310682,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 6241
    },
    {
      "epoch": 0.06242,
      "grad_norm": 0.8694164086668165,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 6242
    },
    {
      "epoch": 0.06243,
      "grad_norm": 0.8968698662271256,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 6243
    },
    {
      "epoch": 0.06244,
      "grad_norm": 1.0969155124189833,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 6244
    },
    {
      "epoch": 0.06245,
      "grad_norm": 0.9267212634506801,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 6245
    },
    {
      "epoch": 0.06246,
      "grad_norm": 1.0070455660568638,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 6246
    },
    {
      "epoch": 0.06247,
      "grad_norm": 1.1508646699867722,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 6247
    },
    {
      "epoch": 0.06248,
      "grad_norm": 0.9383548414378287,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 6248
    },
    {
      "epoch": 0.06249,
      "grad_norm": 0.8885352213986607,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 6249
    },
    {
      "epoch": 0.0625,
      "grad_norm": 0.8342972923726877,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 6250
    },
    {
      "epoch": 0.06251,
      "grad_norm": 0.9237428978300573,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 6251
    },
    {
      "epoch": 0.06252,
      "grad_norm": 1.084993649871814,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 6252
    },
    {
      "epoch": 0.06253,
      "grad_norm": 1.0323949223042157,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 6253
    },
    {
      "epoch": 0.06254,
      "grad_norm": 0.971393898605744,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 6254
    },
    {
      "epoch": 0.06255,
      "grad_norm": 0.9706954283600536,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 6255
    },
    {
      "epoch": 0.06256,
      "grad_norm": 0.9415015290908161,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 6256
    },
    {
      "epoch": 0.06257,
      "grad_norm": 0.8621578985318906,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 6257
    },
    {
      "epoch": 0.06258,
      "grad_norm": 0.9548440225051628,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 6258
    },
    {
      "epoch": 0.06259,
      "grad_norm": 0.9263497464951767,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 6259
    },
    {
      "epoch": 0.0626,
      "grad_norm": 0.8908902137670806,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 6260
    },
    {
      "epoch": 0.06261,
      "grad_norm": 0.8327652914912796,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 6261
    },
    {
      "epoch": 0.06262,
      "grad_norm": 0.9538403925841564,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 6262
    },
    {
      "epoch": 0.06263,
      "grad_norm": 1.1599473881323492,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 6263
    },
    {
      "epoch": 0.06264,
      "grad_norm": 1.0110607933412514,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 6264
    },
    {
      "epoch": 0.06265,
      "grad_norm": 1.0076482190602307,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 6265
    },
    {
      "epoch": 0.06266,
      "grad_norm": 0.8737903635942322,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 6266
    },
    {
      "epoch": 0.06267,
      "grad_norm": 0.8712992228434959,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 6267
    },
    {
      "epoch": 0.06268,
      "grad_norm": 1.017260911789287,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 6268
    },
    {
      "epoch": 0.06269,
      "grad_norm": 0.997967424239097,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 6269
    },
    {
      "epoch": 0.0627,
      "grad_norm": 1.0751801208147977,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 6270
    },
    {
      "epoch": 0.06271,
      "grad_norm": 1.0371657284207103,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 6271
    },
    {
      "epoch": 0.06272,
      "grad_norm": 1.2803244076660563,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 6272
    },
    {
      "epoch": 0.06273,
      "grad_norm": 0.9500833272967772,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 6273
    },
    {
      "epoch": 0.06274,
      "grad_norm": 1.0958729001288292,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 6274
    },
    {
      "epoch": 0.06275,
      "grad_norm": 1.141758768594646,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 6275
    },
    {
      "epoch": 0.06276,
      "grad_norm": 0.9628716788683748,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 6276
    },
    {
      "epoch": 0.06277,
      "grad_norm": 1.0976060075916712,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 6277
    },
    {
      "epoch": 0.06278,
      "grad_norm": 1.016066298115058,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 6278
    },
    {
      "epoch": 0.06279,
      "grad_norm": 0.8769401740306906,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 6279
    },
    {
      "epoch": 0.0628,
      "grad_norm": 0.9170763425191215,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 6280
    },
    {
      "epoch": 0.06281,
      "grad_norm": 1.111920165775686,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 6281
    },
    {
      "epoch": 0.06282,
      "grad_norm": 1.0224143144159263,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 6282
    },
    {
      "epoch": 0.06283,
      "grad_norm": 1.1015506226196767,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 6283
    },
    {
      "epoch": 0.06284,
      "grad_norm": 0.9844477787774102,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 6284
    },
    {
      "epoch": 0.06285,
      "grad_norm": 1.0020352933530818,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 6285
    },
    {
      "epoch": 0.06286,
      "grad_norm": 0.8631477379490902,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 6286
    },
    {
      "epoch": 0.06287,
      "grad_norm": 0.8888287717900007,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 6287
    },
    {
      "epoch": 0.06288,
      "grad_norm": 0.8183385522469628,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 6288
    },
    {
      "epoch": 0.06289,
      "grad_norm": 0.7548987473458644,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 6289
    },
    {
      "epoch": 0.0629,
      "grad_norm": 0.839173523396433,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 6290
    },
    {
      "epoch": 0.06291,
      "grad_norm": 0.8617471545841462,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 6291
    },
    {
      "epoch": 0.06292,
      "grad_norm": 0.8638474112750509,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 6292
    },
    {
      "epoch": 0.06293,
      "grad_norm": 0.9541204019425185,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 6293
    },
    {
      "epoch": 0.06294,
      "grad_norm": 1.1723643539699384,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 6294
    },
    {
      "epoch": 0.06295,
      "grad_norm": 1.103442251330168,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 6295
    },
    {
      "epoch": 0.06296,
      "grad_norm": 1.0703680632965407,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 6296
    },
    {
      "epoch": 0.06297,
      "grad_norm": 1.0274650320053247,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 6297
    },
    {
      "epoch": 0.06298,
      "grad_norm": 0.89154419061986,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 6298
    },
    {
      "epoch": 0.06299,
      "grad_norm": 0.9135944828728849,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 6299
    },
    {
      "epoch": 0.063,
      "grad_norm": 0.8267113445895482,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 6300
    },
    {
      "epoch": 0.06301,
      "grad_norm": 1.0037936149372095,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 6301
    },
    {
      "epoch": 0.06302,
      "grad_norm": 1.2860071610961534,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 6302
    },
    {
      "epoch": 0.06303,
      "grad_norm": 1.0162453119057104,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 6303
    },
    {
      "epoch": 0.06304,
      "grad_norm": 1.0450143375967518,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 6304
    },
    {
      "epoch": 0.06305,
      "grad_norm": 1.1401136900936193,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 6305
    },
    {
      "epoch": 0.06306,
      "grad_norm": 1.0466179358101595,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 6306
    },
    {
      "epoch": 0.06307,
      "grad_norm": 1.1366527233292367,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 6307
    },
    {
      "epoch": 0.06308,
      "grad_norm": 0.8947523569057309,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 6308
    },
    {
      "epoch": 0.06309,
      "grad_norm": 0.9787036236305489,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 6309
    },
    {
      "epoch": 0.0631,
      "grad_norm": 1.1060093552214123,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 6310
    },
    {
      "epoch": 0.06311,
      "grad_norm": 0.934221683083381,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 6311
    },
    {
      "epoch": 0.06312,
      "grad_norm": 1.0377549576803702,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 6312
    },
    {
      "epoch": 0.06313,
      "grad_norm": 0.9581325400036763,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 6313
    },
    {
      "epoch": 0.06314,
      "grad_norm": 1.1035319349717054,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 6314
    },
    {
      "epoch": 0.06315,
      "grad_norm": 0.9924113339745636,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 6315
    },
    {
      "epoch": 0.06316,
      "grad_norm": 1.0717645296978155,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 6316
    },
    {
      "epoch": 0.06317,
      "grad_norm": 1.0557719176935128,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 6317
    },
    {
      "epoch": 0.06318,
      "grad_norm": 1.151290740982202,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 6318
    },
    {
      "epoch": 0.06319,
      "grad_norm": 1.1038168869223615,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 6319
    },
    {
      "epoch": 0.0632,
      "grad_norm": 0.9945691257477356,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 6320
    },
    {
      "epoch": 0.06321,
      "grad_norm": 1.0211879444413088,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 6321
    },
    {
      "epoch": 0.06322,
      "grad_norm": 1.074648930120513,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 6322
    },
    {
      "epoch": 0.06323,
      "grad_norm": 1.0531931604114575,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 6323
    },
    {
      "epoch": 0.06324,
      "grad_norm": 1.0525740567574657,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 6324
    },
    {
      "epoch": 0.06325,
      "grad_norm": 1.0156601469799102,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 6325
    },
    {
      "epoch": 0.06326,
      "grad_norm": 1.0885712514526227,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 6326
    },
    {
      "epoch": 0.06327,
      "grad_norm": 0.9064082305136328,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 6327
    },
    {
      "epoch": 0.06328,
      "grad_norm": 0.9294707681271361,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 6328
    },
    {
      "epoch": 0.06329,
      "grad_norm": 1.3371931683837754,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 6329
    },
    {
      "epoch": 0.0633,
      "grad_norm": 0.8781010739013081,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 6330
    },
    {
      "epoch": 0.06331,
      "grad_norm": 0.8544193926722347,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 6331
    },
    {
      "epoch": 0.06332,
      "grad_norm": 0.8011724000742859,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 6332
    },
    {
      "epoch": 0.06333,
      "grad_norm": 0.7119981509484664,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 6333
    },
    {
      "epoch": 0.06334,
      "grad_norm": 0.6668806340124519,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 6334
    },
    {
      "epoch": 0.06335,
      "grad_norm": 0.8212002598201582,
      "learning_rate": 0.003,
      "loss": 4.0129,
      "step": 6335
    },
    {
      "epoch": 0.06336,
      "grad_norm": 0.8088000326714004,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 6336
    },
    {
      "epoch": 0.06337,
      "grad_norm": 0.8536839468376971,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 6337
    },
    {
      "epoch": 0.06338,
      "grad_norm": 1.1037572047297124,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 6338
    },
    {
      "epoch": 0.06339,
      "grad_norm": 1.3377815021844122,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 6339
    },
    {
      "epoch": 0.0634,
      "grad_norm": 0.6048765308025956,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 6340
    },
    {
      "epoch": 0.06341,
      "grad_norm": 0.7799193061695396,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 6341
    },
    {
      "epoch": 0.06342,
      "grad_norm": 1.1151041355238953,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 6342
    },
    {
      "epoch": 0.06343,
      "grad_norm": 1.1674486948458083,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 6343
    },
    {
      "epoch": 0.06344,
      "grad_norm": 1.031837089911801,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 6344
    },
    {
      "epoch": 0.06345,
      "grad_norm": 0.8615252116136146,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 6345
    },
    {
      "epoch": 0.06346,
      "grad_norm": 0.8103372667424406,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 6346
    },
    {
      "epoch": 0.06347,
      "grad_norm": 0.8933549042384386,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 6347
    },
    {
      "epoch": 0.06348,
      "grad_norm": 0.9637829939992113,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 6348
    },
    {
      "epoch": 0.06349,
      "grad_norm": 0.9135048719984086,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 6349
    },
    {
      "epoch": 0.0635,
      "grad_norm": 1.1242591431500657,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 6350
    },
    {
      "epoch": 0.06351,
      "grad_norm": 0.9980317399940769,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 6351
    },
    {
      "epoch": 0.06352,
      "grad_norm": 1.0451922994538014,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 6352
    },
    {
      "epoch": 0.06353,
      "grad_norm": 0.9845927560222498,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 6353
    },
    {
      "epoch": 0.06354,
      "grad_norm": 1.096403795880879,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 6354
    },
    {
      "epoch": 0.06355,
      "grad_norm": 0.9983219209770657,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 6355
    },
    {
      "epoch": 0.06356,
      "grad_norm": 1.187703733940572,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 6356
    },
    {
      "epoch": 0.06357,
      "grad_norm": 0.9615101296213491,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 6357
    },
    {
      "epoch": 0.06358,
      "grad_norm": 1.1451194898172852,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 6358
    },
    {
      "epoch": 0.06359,
      "grad_norm": 1.0483116074347743,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 6359
    },
    {
      "epoch": 0.0636,
      "grad_norm": 0.9340547243285371,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 6360
    },
    {
      "epoch": 0.06361,
      "grad_norm": 0.8836803848175515,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 6361
    },
    {
      "epoch": 0.06362,
      "grad_norm": 0.8297069107111867,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 6362
    },
    {
      "epoch": 0.06363,
      "grad_norm": 0.9691564696742783,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 6363
    },
    {
      "epoch": 0.06364,
      "grad_norm": 1.0194286737320961,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 6364
    },
    {
      "epoch": 0.06365,
      "grad_norm": 1.0411408567959355,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 6365
    },
    {
      "epoch": 0.06366,
      "grad_norm": 0.9916919363829898,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 6366
    },
    {
      "epoch": 0.06367,
      "grad_norm": 0.9560672684375412,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 6367
    },
    {
      "epoch": 0.06368,
      "grad_norm": 1.1782070353358838,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 6368
    },
    {
      "epoch": 0.06369,
      "grad_norm": 1.133684868527638,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 6369
    },
    {
      "epoch": 0.0637,
      "grad_norm": 0.8986796581542652,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 6370
    },
    {
      "epoch": 0.06371,
      "grad_norm": 0.798117831160487,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 6371
    },
    {
      "epoch": 0.06372,
      "grad_norm": 0.841679881049868,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 6372
    },
    {
      "epoch": 0.06373,
      "grad_norm": 0.912755996468821,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 6373
    },
    {
      "epoch": 0.06374,
      "grad_norm": 0.8902631081176816,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 6374
    },
    {
      "epoch": 0.06375,
      "grad_norm": 0.9176190979451236,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 6375
    },
    {
      "epoch": 0.06376,
      "grad_norm": 1.0740748411500862,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 6376
    },
    {
      "epoch": 0.06377,
      "grad_norm": 1.1585312174412248,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 6377
    },
    {
      "epoch": 0.06378,
      "grad_norm": 1.0560699306688008,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 6378
    },
    {
      "epoch": 0.06379,
      "grad_norm": 1.2860487561573273,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 6379
    },
    {
      "epoch": 0.0638,
      "grad_norm": 0.8690291787190746,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 6380
    },
    {
      "epoch": 0.06381,
      "grad_norm": 0.9436829595338491,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 6381
    },
    {
      "epoch": 0.06382,
      "grad_norm": 1.0685626630336664,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 6382
    },
    {
      "epoch": 0.06383,
      "grad_norm": 0.8979552535870967,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 6383
    },
    {
      "epoch": 0.06384,
      "grad_norm": 0.9809724782902665,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 6384
    },
    {
      "epoch": 0.06385,
      "grad_norm": 0.9328427920083763,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 6385
    },
    {
      "epoch": 0.06386,
      "grad_norm": 0.8685113357756273,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 6386
    },
    {
      "epoch": 0.06387,
      "grad_norm": 0.9155556766963706,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 6387
    },
    {
      "epoch": 0.06388,
      "grad_norm": 0.9520115095678209,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 6388
    },
    {
      "epoch": 0.06389,
      "grad_norm": 0.9650777255338291,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 6389
    },
    {
      "epoch": 0.0639,
      "grad_norm": 0.9711913071461878,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 6390
    },
    {
      "epoch": 0.06391,
      "grad_norm": 1.0582916428545706,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 6391
    },
    {
      "epoch": 0.06392,
      "grad_norm": 1.1228513540493317,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 6392
    },
    {
      "epoch": 0.06393,
      "grad_norm": 0.9824467176596088,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 6393
    },
    {
      "epoch": 0.06394,
      "grad_norm": 1.0748961169419036,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 6394
    },
    {
      "epoch": 0.06395,
      "grad_norm": 1.2122348207004991,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 6395
    },
    {
      "epoch": 0.06396,
      "grad_norm": 0.9525125913234459,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 6396
    },
    {
      "epoch": 0.06397,
      "grad_norm": 1.1157249696277263,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 6397
    },
    {
      "epoch": 0.06398,
      "grad_norm": 1.0962231868724095,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 6398
    },
    {
      "epoch": 0.06399,
      "grad_norm": 0.7475394505526716,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 6399
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.7790863228862557,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 6400
    },
    {
      "epoch": 0.06401,
      "grad_norm": 0.9202551013299898,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 6401
    },
    {
      "epoch": 0.06402,
      "grad_norm": 0.9324453718148813,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 6402
    },
    {
      "epoch": 0.06403,
      "grad_norm": 1.0178636443747968,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 6403
    },
    {
      "epoch": 0.06404,
      "grad_norm": 1.0889588831558945,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 6404
    },
    {
      "epoch": 0.06405,
      "grad_norm": 1.0511307110083934,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 6405
    },
    {
      "epoch": 0.06406,
      "grad_norm": 0.8386442367077682,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 6406
    },
    {
      "epoch": 0.06407,
      "grad_norm": 0.9385442318393312,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 6407
    },
    {
      "epoch": 0.06408,
      "grad_norm": 1.1880536233711294,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 6408
    },
    {
      "epoch": 0.06409,
      "grad_norm": 1.0715566126733156,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 6409
    },
    {
      "epoch": 0.0641,
      "grad_norm": 1.0226807648909895,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 6410
    },
    {
      "epoch": 0.06411,
      "grad_norm": 1.0144667419036346,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 6411
    },
    {
      "epoch": 0.06412,
      "grad_norm": 1.0953706363475204,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 6412
    },
    {
      "epoch": 0.06413,
      "grad_norm": 1.0397462803376565,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 6413
    },
    {
      "epoch": 0.06414,
      "grad_norm": 0.9792075931298253,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 6414
    },
    {
      "epoch": 0.06415,
      "grad_norm": 1.0282547055284612,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 6415
    },
    {
      "epoch": 0.06416,
      "grad_norm": 1.159533810090572,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 6416
    },
    {
      "epoch": 0.06417,
      "grad_norm": 1.1321519339699178,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 6417
    },
    {
      "epoch": 0.06418,
      "grad_norm": 0.9713020938212722,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 6418
    },
    {
      "epoch": 0.06419,
      "grad_norm": 1.2376733723226676,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 6419
    },
    {
      "epoch": 0.0642,
      "grad_norm": 0.9221140531392306,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 6420
    },
    {
      "epoch": 0.06421,
      "grad_norm": 0.9969702997793933,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 6421
    },
    {
      "epoch": 0.06422,
      "grad_norm": 1.0567219061372177,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 6422
    },
    {
      "epoch": 0.06423,
      "grad_norm": 0.991239045933805,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 6423
    },
    {
      "epoch": 0.06424,
      "grad_norm": 1.0308086601832684,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 6424
    },
    {
      "epoch": 0.06425,
      "grad_norm": 1.0347557359010413,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 6425
    },
    {
      "epoch": 0.06426,
      "grad_norm": 1.180251291539417,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 6426
    },
    {
      "epoch": 0.06427,
      "grad_norm": 1.0107602148306123,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 6427
    },
    {
      "epoch": 0.06428,
      "grad_norm": 1.0541298404940496,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 6428
    },
    {
      "epoch": 0.06429,
      "grad_norm": 1.0253882188929047,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 6429
    },
    {
      "epoch": 0.0643,
      "grad_norm": 1.143653137235699,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 6430
    },
    {
      "epoch": 0.06431,
      "grad_norm": 0.9421049550422983,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 6431
    },
    {
      "epoch": 0.06432,
      "grad_norm": 1.1073171928356662,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 6432
    },
    {
      "epoch": 0.06433,
      "grad_norm": 1.0407008893715575,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 6433
    },
    {
      "epoch": 0.06434,
      "grad_norm": 1.1395794148637106,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 6434
    },
    {
      "epoch": 0.06435,
      "grad_norm": 1.026318482136573,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 6435
    },
    {
      "epoch": 0.06436,
      "grad_norm": 1.1702677902712457,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 6436
    },
    {
      "epoch": 0.06437,
      "grad_norm": 1.025273476921687,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 6437
    },
    {
      "epoch": 0.06438,
      "grad_norm": 1.0119572577344156,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 6438
    },
    {
      "epoch": 0.06439,
      "grad_norm": 0.8412430606835657,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 6439
    },
    {
      "epoch": 0.0644,
      "grad_norm": 0.9507548790037242,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 6440
    },
    {
      "epoch": 0.06441,
      "grad_norm": 0.9244681652798047,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 6441
    },
    {
      "epoch": 0.06442,
      "grad_norm": 0.8807778875679908,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 6442
    },
    {
      "epoch": 0.06443,
      "grad_norm": 0.827270815956522,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 6443
    },
    {
      "epoch": 0.06444,
      "grad_norm": 0.803325818969944,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 6444
    },
    {
      "epoch": 0.06445,
      "grad_norm": 0.9385724441228014,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 6445
    },
    {
      "epoch": 0.06446,
      "grad_norm": 1.2005144304193571,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 6446
    },
    {
      "epoch": 0.06447,
      "grad_norm": 0.9501919917514117,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 6447
    },
    {
      "epoch": 0.06448,
      "grad_norm": 1.0236274372742054,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 6448
    },
    {
      "epoch": 0.06449,
      "grad_norm": 1.0925032591156019,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 6449
    },
    {
      "epoch": 0.0645,
      "grad_norm": 0.9065417419150241,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 6450
    },
    {
      "epoch": 0.06451,
      "grad_norm": 0.8998787170517907,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 6451
    },
    {
      "epoch": 0.06452,
      "grad_norm": 0.9437442931525397,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 6452
    },
    {
      "epoch": 0.06453,
      "grad_norm": 1.0928923944168785,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 6453
    },
    {
      "epoch": 0.06454,
      "grad_norm": 1.1495081243301128,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 6454
    },
    {
      "epoch": 0.06455,
      "grad_norm": 0.9528175723610092,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 6455
    },
    {
      "epoch": 0.06456,
      "grad_norm": 1.0833738182788575,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 6456
    },
    {
      "epoch": 0.06457,
      "grad_norm": 1.1107357171312071,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 6457
    },
    {
      "epoch": 0.06458,
      "grad_norm": 1.1510295337322964,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 6458
    },
    {
      "epoch": 0.06459,
      "grad_norm": 1.0262159806305138,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 6459
    },
    {
      "epoch": 0.0646,
      "grad_norm": 1.014576196236502,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 6460
    },
    {
      "epoch": 0.06461,
      "grad_norm": 1.064471879938064,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 6461
    },
    {
      "epoch": 0.06462,
      "grad_norm": 0.9222498176393344,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 6462
    },
    {
      "epoch": 0.06463,
      "grad_norm": 0.8981156217551731,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 6463
    },
    {
      "epoch": 0.06464,
      "grad_norm": 1.1107391697536575,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 6464
    },
    {
      "epoch": 0.06465,
      "grad_norm": 1.0063336191833832,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 6465
    },
    {
      "epoch": 0.06466,
      "grad_norm": 1.1961559729990558,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 6466
    },
    {
      "epoch": 0.06467,
      "grad_norm": 0.8793978826576908,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 6467
    },
    {
      "epoch": 0.06468,
      "grad_norm": 1.0427733287660477,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 6468
    },
    {
      "epoch": 0.06469,
      "grad_norm": 1.1251364660455194,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 6469
    },
    {
      "epoch": 0.0647,
      "grad_norm": 1.0043010097247658,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 6470
    },
    {
      "epoch": 0.06471,
      "grad_norm": 1.1207301616281806,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 6471
    },
    {
      "epoch": 0.06472,
      "grad_norm": 0.9991580557027551,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 6472
    },
    {
      "epoch": 0.06473,
      "grad_norm": 1.1171158410309772,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 6473
    },
    {
      "epoch": 0.06474,
      "grad_norm": 0.8529112213695773,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 6474
    },
    {
      "epoch": 0.06475,
      "grad_norm": 0.8417759233289197,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 6475
    },
    {
      "epoch": 0.06476,
      "grad_norm": 0.9302953810525731,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 6476
    },
    {
      "epoch": 0.06477,
      "grad_norm": 1.0176820311771924,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 6477
    },
    {
      "epoch": 0.06478,
      "grad_norm": 1.1064098044871353,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 6478
    },
    {
      "epoch": 0.06479,
      "grad_norm": 1.0450987906387426,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 6479
    },
    {
      "epoch": 0.0648,
      "grad_norm": 1.0076941555756993,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 6480
    },
    {
      "epoch": 0.06481,
      "grad_norm": 0.8795541683622259,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 6481
    },
    {
      "epoch": 0.06482,
      "grad_norm": 0.9850429100701444,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 6482
    },
    {
      "epoch": 0.06483,
      "grad_norm": 1.104728714549408,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 6483
    },
    {
      "epoch": 0.06484,
      "grad_norm": 1.0625023539317728,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 6484
    },
    {
      "epoch": 0.06485,
      "grad_norm": 0.9421033844694122,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 6485
    },
    {
      "epoch": 0.06486,
      "grad_norm": 0.8988596473281786,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 6486
    },
    {
      "epoch": 0.06487,
      "grad_norm": 0.963516647410238,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 6487
    },
    {
      "epoch": 0.06488,
      "grad_norm": 1.0333879473021825,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 6488
    },
    {
      "epoch": 0.06489,
      "grad_norm": 1.0586786599679208,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 6489
    },
    {
      "epoch": 0.0649,
      "grad_norm": 0.9937287592814337,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 6490
    },
    {
      "epoch": 0.06491,
      "grad_norm": 1.1726633804612512,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 6491
    },
    {
      "epoch": 0.06492,
      "grad_norm": 1.0927920818435795,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 6492
    },
    {
      "epoch": 0.06493,
      "grad_norm": 1.0227562445548077,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 6493
    },
    {
      "epoch": 0.06494,
      "grad_norm": 1.0831459762005358,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 6494
    },
    {
      "epoch": 0.06495,
      "grad_norm": 1.0262098572577179,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 6495
    },
    {
      "epoch": 0.06496,
      "grad_norm": 1.2236916592367486,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 6496
    },
    {
      "epoch": 0.06497,
      "grad_norm": 1.0285867021198758,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 6497
    },
    {
      "epoch": 0.06498,
      "grad_norm": 1.1733459098787142,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 6498
    },
    {
      "epoch": 0.06499,
      "grad_norm": 1.0531898340962396,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 6499
    },
    {
      "epoch": 0.065,
      "grad_norm": 1.060199193118051,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 6500
    },
    {
      "epoch": 0.06501,
      "grad_norm": 1.1527686386988991,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 6501
    },
    {
      "epoch": 0.06502,
      "grad_norm": 1.029295109562913,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 6502
    },
    {
      "epoch": 0.06503,
      "grad_norm": 0.9836711412915636,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 6503
    },
    {
      "epoch": 0.06504,
      "grad_norm": 0.9285828574317885,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 6504
    },
    {
      "epoch": 0.06505,
      "grad_norm": 0.975483354586866,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 6505
    },
    {
      "epoch": 0.06506,
      "grad_norm": 1.2115222037214375,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 6506
    },
    {
      "epoch": 0.06507,
      "grad_norm": 0.9656432384586594,
      "learning_rate": 0.003,
      "loss": 4.0169,
      "step": 6507
    },
    {
      "epoch": 0.06508,
      "grad_norm": 1.0366546849866198,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 6508
    },
    {
      "epoch": 0.06509,
      "grad_norm": 1.009479145061525,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 6509
    },
    {
      "epoch": 0.0651,
      "grad_norm": 1.0366020609012134,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 6510
    },
    {
      "epoch": 0.06511,
      "grad_norm": 0.9709239930407754,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 6511
    },
    {
      "epoch": 0.06512,
      "grad_norm": 0.8644707502418649,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 6512
    },
    {
      "epoch": 0.06513,
      "grad_norm": 0.8946018112148616,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 6513
    },
    {
      "epoch": 0.06514,
      "grad_norm": 1.032708726685494,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 6514
    },
    {
      "epoch": 0.06515,
      "grad_norm": 1.0409138603347203,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 6515
    },
    {
      "epoch": 0.06516,
      "grad_norm": 0.9892609384071499,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 6516
    },
    {
      "epoch": 0.06517,
      "grad_norm": 0.9148700446008358,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 6517
    },
    {
      "epoch": 0.06518,
      "grad_norm": 0.9979358270843701,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 6518
    },
    {
      "epoch": 0.06519,
      "grad_norm": 1.185271592002419,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 6519
    },
    {
      "epoch": 0.0652,
      "grad_norm": 0.9704305412861196,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 6520
    },
    {
      "epoch": 0.06521,
      "grad_norm": 1.1014185224929063,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 6521
    },
    {
      "epoch": 0.06522,
      "grad_norm": 1.0703081501278155,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 6522
    },
    {
      "epoch": 0.06523,
      "grad_norm": 0.8771352485540366,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 6523
    },
    {
      "epoch": 0.06524,
      "grad_norm": 0.9130638198055455,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 6524
    },
    {
      "epoch": 0.06525,
      "grad_norm": 1.1326914793850837,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 6525
    },
    {
      "epoch": 0.06526,
      "grad_norm": 1.021254454818802,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 6526
    },
    {
      "epoch": 0.06527,
      "grad_norm": 1.073497230759006,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 6527
    },
    {
      "epoch": 0.06528,
      "grad_norm": 0.9631221418357153,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 6528
    },
    {
      "epoch": 0.06529,
      "grad_norm": 0.8809083485174808,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 6529
    },
    {
      "epoch": 0.0653,
      "grad_norm": 0.9688255439544828,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 6530
    },
    {
      "epoch": 0.06531,
      "grad_norm": 0.9824939601057029,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 6531
    },
    {
      "epoch": 0.06532,
      "grad_norm": 0.9436605093416098,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 6532
    },
    {
      "epoch": 0.06533,
      "grad_norm": 0.8767433235215197,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 6533
    },
    {
      "epoch": 0.06534,
      "grad_norm": 1.0523478236740318,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 6534
    },
    {
      "epoch": 0.06535,
      "grad_norm": 1.0000197595533407,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 6535
    },
    {
      "epoch": 0.06536,
      "grad_norm": 1.2508564779795237,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 6536
    },
    {
      "epoch": 0.06537,
      "grad_norm": 0.789891181606252,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 6537
    },
    {
      "epoch": 0.06538,
      "grad_norm": 0.7387596226051653,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 6538
    },
    {
      "epoch": 0.06539,
      "grad_norm": 0.6182415296014235,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 6539
    },
    {
      "epoch": 0.0654,
      "grad_norm": 0.6896044138716849,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 6540
    },
    {
      "epoch": 0.06541,
      "grad_norm": 0.9475899668837449,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 6541
    },
    {
      "epoch": 0.06542,
      "grad_norm": 1.214498343082983,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 6542
    },
    {
      "epoch": 0.06543,
      "grad_norm": 0.8105078624490595,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 6543
    },
    {
      "epoch": 0.06544,
      "grad_norm": 0.7169187786636381,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 6544
    },
    {
      "epoch": 0.06545,
      "grad_norm": 0.7645714804284779,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 6545
    },
    {
      "epoch": 0.06546,
      "grad_norm": 0.7984074302017922,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 6546
    },
    {
      "epoch": 0.06547,
      "grad_norm": 0.898561711007871,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 6547
    },
    {
      "epoch": 0.06548,
      "grad_norm": 1.0356346515332355,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 6548
    },
    {
      "epoch": 0.06549,
      "grad_norm": 1.2065842171050336,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 6549
    },
    {
      "epoch": 0.0655,
      "grad_norm": 0.9647919793019113,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 6550
    },
    {
      "epoch": 0.06551,
      "grad_norm": 0.9830823232743289,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 6551
    },
    {
      "epoch": 0.06552,
      "grad_norm": 1.145414200189479,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 6552
    },
    {
      "epoch": 0.06553,
      "grad_norm": 1.1282546842302874,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 6553
    },
    {
      "epoch": 0.06554,
      "grad_norm": 1.0724517832235836,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 6554
    },
    {
      "epoch": 0.06555,
      "grad_norm": 0.9247118456778185,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 6555
    },
    {
      "epoch": 0.06556,
      "grad_norm": 0.8809168909392743,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 6556
    },
    {
      "epoch": 0.06557,
      "grad_norm": 1.0027045686558371,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 6557
    },
    {
      "epoch": 0.06558,
      "grad_norm": 1.1521312374668213,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 6558
    },
    {
      "epoch": 0.06559,
      "grad_norm": 0.9170120064963928,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 6559
    },
    {
      "epoch": 0.0656,
      "grad_norm": 0.8925486266426595,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 6560
    },
    {
      "epoch": 0.06561,
      "grad_norm": 0.9709022873633115,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 6561
    },
    {
      "epoch": 0.06562,
      "grad_norm": 1.0699148932589904,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 6562
    },
    {
      "epoch": 0.06563,
      "grad_norm": 1.018722166079644,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 6563
    },
    {
      "epoch": 0.06564,
      "grad_norm": 1.0104563015086112,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 6564
    },
    {
      "epoch": 0.06565,
      "grad_norm": 0.9932591703232511,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 6565
    },
    {
      "epoch": 0.06566,
      "grad_norm": 0.9531047599334086,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 6566
    },
    {
      "epoch": 0.06567,
      "grad_norm": 1.0533507364705403,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 6567
    },
    {
      "epoch": 0.06568,
      "grad_norm": 1.2316178931504538,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 6568
    },
    {
      "epoch": 0.06569,
      "grad_norm": 0.9719523712712863,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 6569
    },
    {
      "epoch": 0.0657,
      "grad_norm": 0.9929746213073647,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 6570
    },
    {
      "epoch": 0.06571,
      "grad_norm": 1.2226520601321686,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 6571
    },
    {
      "epoch": 0.06572,
      "grad_norm": 1.0093561514710232,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 6572
    },
    {
      "epoch": 0.06573,
      "grad_norm": 1.0716061347239403,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 6573
    },
    {
      "epoch": 0.06574,
      "grad_norm": 0.9809951043877417,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 6574
    },
    {
      "epoch": 0.06575,
      "grad_norm": 0.9974953620162613,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 6575
    },
    {
      "epoch": 0.06576,
      "grad_norm": 0.9179282772036231,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 6576
    },
    {
      "epoch": 0.06577,
      "grad_norm": 1.0424561456811468,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 6577
    },
    {
      "epoch": 0.06578,
      "grad_norm": 1.0920709972772817,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 6578
    },
    {
      "epoch": 0.06579,
      "grad_norm": 1.2580611117333567,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 6579
    },
    {
      "epoch": 0.0658,
      "grad_norm": 0.799965431122157,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 6580
    },
    {
      "epoch": 0.06581,
      "grad_norm": 0.781504567533229,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 6581
    },
    {
      "epoch": 0.06582,
      "grad_norm": 0.8493771135199062,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 6582
    },
    {
      "epoch": 0.06583,
      "grad_norm": 1.08382714627964,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 6583
    },
    {
      "epoch": 0.06584,
      "grad_norm": 0.9060976759116695,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 6584
    },
    {
      "epoch": 0.06585,
      "grad_norm": 1.0250567461266544,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 6585
    },
    {
      "epoch": 0.06586,
      "grad_norm": 1.2416572702479831,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 6586
    },
    {
      "epoch": 0.06587,
      "grad_norm": 0.8978047159555252,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 6587
    },
    {
      "epoch": 0.06588,
      "grad_norm": 0.827033589453339,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 6588
    },
    {
      "epoch": 0.06589,
      "grad_norm": 0.8211186750627676,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 6589
    },
    {
      "epoch": 0.0659,
      "grad_norm": 0.8442088987694915,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 6590
    },
    {
      "epoch": 0.06591,
      "grad_norm": 1.0094167846920752,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 6591
    },
    {
      "epoch": 0.06592,
      "grad_norm": 1.13531615910892,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 6592
    },
    {
      "epoch": 0.06593,
      "grad_norm": 0.8982297119321998,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 6593
    },
    {
      "epoch": 0.06594,
      "grad_norm": 0.946141231020595,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 6594
    },
    {
      "epoch": 0.06595,
      "grad_norm": 0.9136182259899146,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 6595
    },
    {
      "epoch": 0.06596,
      "grad_norm": 0.8744151642325578,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 6596
    },
    {
      "epoch": 0.06597,
      "grad_norm": 0.9533603618553095,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 6597
    },
    {
      "epoch": 0.06598,
      "grad_norm": 1.0604427104690746,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 6598
    },
    {
      "epoch": 0.06599,
      "grad_norm": 1.067472601559575,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 6599
    },
    {
      "epoch": 0.066,
      "grad_norm": 1.0838031919616027,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 6600
    },
    {
      "epoch": 0.06601,
      "grad_norm": 1.0258280341899866,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 6601
    },
    {
      "epoch": 0.06602,
      "grad_norm": 1.0270369431845496,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 6602
    },
    {
      "epoch": 0.06603,
      "grad_norm": 1.0254464189303953,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 6603
    },
    {
      "epoch": 0.06604,
      "grad_norm": 1.0679326763588755,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 6604
    },
    {
      "epoch": 0.06605,
      "grad_norm": 0.9415976678420839,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 6605
    },
    {
      "epoch": 0.06606,
      "grad_norm": 0.9849735102421308,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 6606
    },
    {
      "epoch": 0.06607,
      "grad_norm": 1.1302001807963977,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 6607
    },
    {
      "epoch": 0.06608,
      "grad_norm": 1.0851366334623165,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 6608
    },
    {
      "epoch": 0.06609,
      "grad_norm": 1.2878538978737437,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 6609
    },
    {
      "epoch": 0.0661,
      "grad_norm": 0.9809189178062531,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 6610
    },
    {
      "epoch": 0.06611,
      "grad_norm": 0.9774112014402608,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 6611
    },
    {
      "epoch": 0.06612,
      "grad_norm": 1.074206882798712,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 6612
    },
    {
      "epoch": 0.06613,
      "grad_norm": 0.9119362233622336,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 6613
    },
    {
      "epoch": 0.06614,
      "grad_norm": 0.9092970755783418,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 6614
    },
    {
      "epoch": 0.06615,
      "grad_norm": 0.997529056172159,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 6615
    },
    {
      "epoch": 0.06616,
      "grad_norm": 1.0127096432931733,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 6616
    },
    {
      "epoch": 0.06617,
      "grad_norm": 1.047029183095652,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 6617
    },
    {
      "epoch": 0.06618,
      "grad_norm": 0.9374810538498864,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 6618
    },
    {
      "epoch": 0.06619,
      "grad_norm": 0.9483864493653071,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 6619
    },
    {
      "epoch": 0.0662,
      "grad_norm": 1.0543118381581815,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 6620
    },
    {
      "epoch": 0.06621,
      "grad_norm": 0.9495585297143883,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 6621
    },
    {
      "epoch": 0.06622,
      "grad_norm": 1.1120232278985485,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 6622
    },
    {
      "epoch": 0.06623,
      "grad_norm": 1.01560273953621,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 6623
    },
    {
      "epoch": 0.06624,
      "grad_norm": 1.1763569739613489,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 6624
    },
    {
      "epoch": 0.06625,
      "grad_norm": 1.1262447069862451,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 6625
    },
    {
      "epoch": 0.06626,
      "grad_norm": 0.9382970303663091,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 6626
    },
    {
      "epoch": 0.06627,
      "grad_norm": 1.031419952696058,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 6627
    },
    {
      "epoch": 0.06628,
      "grad_norm": 1.1000233296997506,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 6628
    },
    {
      "epoch": 0.06629,
      "grad_norm": 1.0494015088404511,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 6629
    },
    {
      "epoch": 0.0663,
      "grad_norm": 0.9410676376451073,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 6630
    },
    {
      "epoch": 0.06631,
      "grad_norm": 1.0689157449200972,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 6631
    },
    {
      "epoch": 0.06632,
      "grad_norm": 0.9674796376390861,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 6632
    },
    {
      "epoch": 0.06633,
      "grad_norm": 1.0093941937371331,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 6633
    },
    {
      "epoch": 0.06634,
      "grad_norm": 1.2337187993906655,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 6634
    },
    {
      "epoch": 0.06635,
      "grad_norm": 0.9607834095606316,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 6635
    },
    {
      "epoch": 0.06636,
      "grad_norm": 0.9317793609051973,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 6636
    },
    {
      "epoch": 0.06637,
      "grad_norm": 0.9290660104219854,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 6637
    },
    {
      "epoch": 0.06638,
      "grad_norm": 0.8475879085595186,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 6638
    },
    {
      "epoch": 0.06639,
      "grad_norm": 0.7941589369777531,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 6639
    },
    {
      "epoch": 0.0664,
      "grad_norm": 0.8043535640617332,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 6640
    },
    {
      "epoch": 0.06641,
      "grad_norm": 0.9938652956113684,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 6641
    },
    {
      "epoch": 0.06642,
      "grad_norm": 1.1838239696711994,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 6642
    },
    {
      "epoch": 0.06643,
      "grad_norm": 0.8933675359063811,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 6643
    },
    {
      "epoch": 0.06644,
      "grad_norm": 0.7456469496808978,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 6644
    },
    {
      "epoch": 0.06645,
      "grad_norm": 0.7991018018930186,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 6645
    },
    {
      "epoch": 0.06646,
      "grad_norm": 0.9578935057690124,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 6646
    },
    {
      "epoch": 0.06647,
      "grad_norm": 1.0975543814373472,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 6647
    },
    {
      "epoch": 0.06648,
      "grad_norm": 0.9324693403913995,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 6648
    },
    {
      "epoch": 0.06649,
      "grad_norm": 0.783828708420081,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 6649
    },
    {
      "epoch": 0.0665,
      "grad_norm": 0.8171820344699803,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 6650
    },
    {
      "epoch": 0.06651,
      "grad_norm": 1.0398397978025293,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 6651
    },
    {
      "epoch": 0.06652,
      "grad_norm": 1.1078725880727078,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 6652
    },
    {
      "epoch": 0.06653,
      "grad_norm": 1.05746120078045,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 6653
    },
    {
      "epoch": 0.06654,
      "grad_norm": 1.1809835092262086,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 6654
    },
    {
      "epoch": 0.06655,
      "grad_norm": 0.9734402094947942,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 6655
    },
    {
      "epoch": 0.06656,
      "grad_norm": 1.1076276318277252,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 6656
    },
    {
      "epoch": 0.06657,
      "grad_norm": 0.977985457890705,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 6657
    },
    {
      "epoch": 0.06658,
      "grad_norm": 0.9396247788800478,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 6658
    },
    {
      "epoch": 0.06659,
      "grad_norm": 1.0889586998264722,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 6659
    },
    {
      "epoch": 0.0666,
      "grad_norm": 1.0252758829952033,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 6660
    },
    {
      "epoch": 0.06661,
      "grad_norm": 1.0468601933896104,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 6661
    },
    {
      "epoch": 0.06662,
      "grad_norm": 1.1000476753001847,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 6662
    },
    {
      "epoch": 0.06663,
      "grad_norm": 1.1085660118615401,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 6663
    },
    {
      "epoch": 0.06664,
      "grad_norm": 0.9874276413847503,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 6664
    },
    {
      "epoch": 0.06665,
      "grad_norm": 1.0456032019182708,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 6665
    },
    {
      "epoch": 0.06666,
      "grad_norm": 1.0094947821010092,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 6666
    },
    {
      "epoch": 0.06667,
      "grad_norm": 1.109076869997086,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 6667
    },
    {
      "epoch": 0.06668,
      "grad_norm": 1.0088933647503096,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 6668
    },
    {
      "epoch": 0.06669,
      "grad_norm": 1.1968841584443815,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 6669
    },
    {
      "epoch": 0.0667,
      "grad_norm": 1.1241320277325377,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 6670
    },
    {
      "epoch": 0.06671,
      "grad_norm": 1.141904583401614,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 6671
    },
    {
      "epoch": 0.06672,
      "grad_norm": 1.062629879867344,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 6672
    },
    {
      "epoch": 0.06673,
      "grad_norm": 0.905571170038888,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 6673
    },
    {
      "epoch": 0.06674,
      "grad_norm": 0.9626950086254233,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 6674
    },
    {
      "epoch": 0.06675,
      "grad_norm": 1.0975398719716734,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 6675
    },
    {
      "epoch": 0.06676,
      "grad_norm": 1.3284543966412756,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 6676
    },
    {
      "epoch": 0.06677,
      "grad_norm": 0.8172955955655188,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 6677
    },
    {
      "epoch": 0.06678,
      "grad_norm": 0.9850016002008366,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 6678
    },
    {
      "epoch": 0.06679,
      "grad_norm": 1.0932275223248946,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 6679
    },
    {
      "epoch": 0.0668,
      "grad_norm": 1.148365923927585,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 6680
    },
    {
      "epoch": 0.06681,
      "grad_norm": 1.0338287959641415,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 6681
    },
    {
      "epoch": 0.06682,
      "grad_norm": 1.179731822743392,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 6682
    },
    {
      "epoch": 0.06683,
      "grad_norm": 0.8843991533234951,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 6683
    },
    {
      "epoch": 0.06684,
      "grad_norm": 0.9108762252979913,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 6684
    },
    {
      "epoch": 0.06685,
      "grad_norm": 1.0436969205506577,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 6685
    },
    {
      "epoch": 0.06686,
      "grad_norm": 1.0267003835359743,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 6686
    },
    {
      "epoch": 0.06687,
      "grad_norm": 0.9909041191744213,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 6687
    },
    {
      "epoch": 0.06688,
      "grad_norm": 1.0186735696395426,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 6688
    },
    {
      "epoch": 0.06689,
      "grad_norm": 1.0837124330823527,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 6689
    },
    {
      "epoch": 0.0669,
      "grad_norm": 0.9374724932078298,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 6690
    },
    {
      "epoch": 0.06691,
      "grad_norm": 0.9609019141278521,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 6691
    },
    {
      "epoch": 0.06692,
      "grad_norm": 0.9532183639646817,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 6692
    },
    {
      "epoch": 0.06693,
      "grad_norm": 0.9948461347977686,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 6693
    },
    {
      "epoch": 0.06694,
      "grad_norm": 0.927563382693705,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 6694
    },
    {
      "epoch": 0.06695,
      "grad_norm": 0.8983506006216038,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 6695
    },
    {
      "epoch": 0.06696,
      "grad_norm": 1.0195737792831439,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 6696
    },
    {
      "epoch": 0.06697,
      "grad_norm": 1.0461424467719074,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 6697
    },
    {
      "epoch": 0.06698,
      "grad_norm": 1.1896402560053234,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 6698
    },
    {
      "epoch": 0.06699,
      "grad_norm": 1.0331768787858662,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 6699
    },
    {
      "epoch": 0.067,
      "grad_norm": 1.0760979816453098,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 6700
    },
    {
      "epoch": 0.06701,
      "grad_norm": 0.9030506574904259,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 6701
    },
    {
      "epoch": 0.06702,
      "grad_norm": 0.9590465759481133,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 6702
    },
    {
      "epoch": 0.06703,
      "grad_norm": 1.0377738190882686,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 6703
    },
    {
      "epoch": 0.06704,
      "grad_norm": 0.9878474206212229,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 6704
    },
    {
      "epoch": 0.06705,
      "grad_norm": 1.266667575265577,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 6705
    },
    {
      "epoch": 0.06706,
      "grad_norm": 0.8290481300791042,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 6706
    },
    {
      "epoch": 0.06707,
      "grad_norm": 0.8267971520194893,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 6707
    },
    {
      "epoch": 0.06708,
      "grad_norm": 0.7961467186413875,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 6708
    },
    {
      "epoch": 0.06709,
      "grad_norm": 0.9824457399373988,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 6709
    },
    {
      "epoch": 0.0671,
      "grad_norm": 1.1961534967718657,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 6710
    },
    {
      "epoch": 0.06711,
      "grad_norm": 0.9265435402662816,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 6711
    },
    {
      "epoch": 0.06712,
      "grad_norm": 1.0157357369850826,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 6712
    },
    {
      "epoch": 0.06713,
      "grad_norm": 0.9690324224443148,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 6713
    },
    {
      "epoch": 0.06714,
      "grad_norm": 0.897252797835452,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 6714
    },
    {
      "epoch": 0.06715,
      "grad_norm": 1.0170717220350938,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 6715
    },
    {
      "epoch": 0.06716,
      "grad_norm": 1.0531655759972556,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 6716
    },
    {
      "epoch": 0.06717,
      "grad_norm": 1.083960596464857,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 6717
    },
    {
      "epoch": 0.06718,
      "grad_norm": 0.9551481210518388,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 6718
    },
    {
      "epoch": 0.06719,
      "grad_norm": 0.935024318023197,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 6719
    },
    {
      "epoch": 0.0672,
      "grad_norm": 0.9498980025125131,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 6720
    },
    {
      "epoch": 0.06721,
      "grad_norm": 1.2015504874657223,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 6721
    },
    {
      "epoch": 0.06722,
      "grad_norm": 0.9347001938513728,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 6722
    },
    {
      "epoch": 0.06723,
      "grad_norm": 1.018756831416971,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 6723
    },
    {
      "epoch": 0.06724,
      "grad_norm": 1.0987676870246745,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 6724
    },
    {
      "epoch": 0.06725,
      "grad_norm": 1.0112178470596225,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 6725
    },
    {
      "epoch": 0.06726,
      "grad_norm": 1.112944901032879,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 6726
    },
    {
      "epoch": 0.06727,
      "grad_norm": 1.163602379560122,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 6727
    },
    {
      "epoch": 0.06728,
      "grad_norm": 0.9360909552819368,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 6728
    },
    {
      "epoch": 0.06729,
      "grad_norm": 1.0605315281502332,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 6729
    },
    {
      "epoch": 0.0673,
      "grad_norm": 1.0349633126182776,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 6730
    },
    {
      "epoch": 0.06731,
      "grad_norm": 1.2616197215123148,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 6731
    },
    {
      "epoch": 0.06732,
      "grad_norm": 1.0836805120817457,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 6732
    },
    {
      "epoch": 0.06733,
      "grad_norm": 0.9975329556874161,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 6733
    },
    {
      "epoch": 0.06734,
      "grad_norm": 1.0166554668806964,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 6734
    },
    {
      "epoch": 0.06735,
      "grad_norm": 0.9860686766932066,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 6735
    },
    {
      "epoch": 0.06736,
      "grad_norm": 1.1900474439099948,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 6736
    },
    {
      "epoch": 0.06737,
      "grad_norm": 0.8977382894817897,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 6737
    },
    {
      "epoch": 0.06738,
      "grad_norm": 0.8986515344833599,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 6738
    },
    {
      "epoch": 0.06739,
      "grad_norm": 1.0321723301672268,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 6739
    },
    {
      "epoch": 0.0674,
      "grad_norm": 1.1818737667535166,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 6740
    },
    {
      "epoch": 0.06741,
      "grad_norm": 0.7306130026600984,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 6741
    },
    {
      "epoch": 0.06742,
      "grad_norm": 0.671364374797458,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 6742
    },
    {
      "epoch": 0.06743,
      "grad_norm": 0.7657736082400693,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 6743
    },
    {
      "epoch": 0.06744,
      "grad_norm": 0.8251136224218921,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 6744
    },
    {
      "epoch": 0.06745,
      "grad_norm": 0.7693016914924291,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 6745
    },
    {
      "epoch": 0.06746,
      "grad_norm": 0.8468866611491973,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 6746
    },
    {
      "epoch": 0.06747,
      "grad_norm": 1.0246297168064884,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 6747
    },
    {
      "epoch": 0.06748,
      "grad_norm": 1.1135872084646647,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 6748
    },
    {
      "epoch": 0.06749,
      "grad_norm": 0.8843107432077526,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 6749
    },
    {
      "epoch": 0.0675,
      "grad_norm": 0.9912073777138863,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 6750
    },
    {
      "epoch": 0.06751,
      "grad_norm": 1.2591178077510798,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 6751
    },
    {
      "epoch": 0.06752,
      "grad_norm": 0.8082384535393943,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 6752
    },
    {
      "epoch": 0.06753,
      "grad_norm": 0.7511358793452372,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 6753
    },
    {
      "epoch": 0.06754,
      "grad_norm": 0.867990155758509,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 6754
    },
    {
      "epoch": 0.06755,
      "grad_norm": 0.9955414763715997,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 6755
    },
    {
      "epoch": 0.06756,
      "grad_norm": 1.2073694787838065,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 6756
    },
    {
      "epoch": 0.06757,
      "grad_norm": 1.0707480255468453,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 6757
    },
    {
      "epoch": 0.06758,
      "grad_norm": 1.1122443531121051,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 6758
    },
    {
      "epoch": 0.06759,
      "grad_norm": 0.9489611527342554,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 6759
    },
    {
      "epoch": 0.0676,
      "grad_norm": 1.0041886074885658,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 6760
    },
    {
      "epoch": 0.06761,
      "grad_norm": 1.355937161252965,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 6761
    },
    {
      "epoch": 0.06762,
      "grad_norm": 0.9894898503815498,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 6762
    },
    {
      "epoch": 0.06763,
      "grad_norm": 1.050921117069599,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 6763
    },
    {
      "epoch": 0.06764,
      "grad_norm": 1.067765228974404,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 6764
    },
    {
      "epoch": 0.06765,
      "grad_norm": 0.9545145035844784,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 6765
    },
    {
      "epoch": 0.06766,
      "grad_norm": 1.0023904128414394,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 6766
    },
    {
      "epoch": 0.06767,
      "grad_norm": 0.9593720771665164,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 6767
    },
    {
      "epoch": 0.06768,
      "grad_norm": 1.1273025407257062,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 6768
    },
    {
      "epoch": 0.06769,
      "grad_norm": 1.087476946360311,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 6769
    },
    {
      "epoch": 0.0677,
      "grad_norm": 1.1529163947763248,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 6770
    },
    {
      "epoch": 0.06771,
      "grad_norm": 0.7314271498801576,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 6771
    },
    {
      "epoch": 0.06772,
      "grad_norm": 0.7723436133611571,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 6772
    },
    {
      "epoch": 0.06773,
      "grad_norm": 0.9678136783714796,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 6773
    },
    {
      "epoch": 0.06774,
      "grad_norm": 1.2406355654915857,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 6774
    },
    {
      "epoch": 0.06775,
      "grad_norm": 0.8156597708290578,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 6775
    },
    {
      "epoch": 0.06776,
      "grad_norm": 0.9530712850122685,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 6776
    },
    {
      "epoch": 0.06777,
      "grad_norm": 1.081652928525485,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 6777
    },
    {
      "epoch": 0.06778,
      "grad_norm": 0.9997360227306185,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 6778
    },
    {
      "epoch": 0.06779,
      "grad_norm": 0.9050972292743207,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 6779
    },
    {
      "epoch": 0.0678,
      "grad_norm": 0.899616606188489,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 6780
    },
    {
      "epoch": 0.06781,
      "grad_norm": 1.035269328464622,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 6781
    },
    {
      "epoch": 0.06782,
      "grad_norm": 1.204532202978388,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 6782
    },
    {
      "epoch": 0.06783,
      "grad_norm": 0.9655678343763224,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 6783
    },
    {
      "epoch": 0.06784,
      "grad_norm": 0.900065330135723,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 6784
    },
    {
      "epoch": 0.06785,
      "grad_norm": 0.9737356124056987,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 6785
    },
    {
      "epoch": 0.06786,
      "grad_norm": 0.9974183872061985,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 6786
    },
    {
      "epoch": 0.06787,
      "grad_norm": 1.041494125314001,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 6787
    },
    {
      "epoch": 0.06788,
      "grad_norm": 1.083171903315331,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 6788
    },
    {
      "epoch": 0.06789,
      "grad_norm": 1.3039936977749973,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 6789
    },
    {
      "epoch": 0.0679,
      "grad_norm": 0.9733172088754207,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 6790
    },
    {
      "epoch": 0.06791,
      "grad_norm": 0.8961119962612736,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 6791
    },
    {
      "epoch": 0.06792,
      "grad_norm": 0.9367283411485818,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 6792
    },
    {
      "epoch": 0.06793,
      "grad_norm": 1.2662871097017818,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 6793
    },
    {
      "epoch": 0.06794,
      "grad_norm": 0.9235411725065059,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 6794
    },
    {
      "epoch": 0.06795,
      "grad_norm": 1.0564098546065013,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 6795
    },
    {
      "epoch": 0.06796,
      "grad_norm": 1.024685829459076,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 6796
    },
    {
      "epoch": 0.06797,
      "grad_norm": 1.120134124815487,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 6797
    },
    {
      "epoch": 0.06798,
      "grad_norm": 0.9517110352700969,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 6798
    },
    {
      "epoch": 0.06799,
      "grad_norm": 0.9692745939925019,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 6799
    },
    {
      "epoch": 0.068,
      "grad_norm": 1.0264417600972477,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 6800
    },
    {
      "epoch": 0.06801,
      "grad_norm": 1.2302920860016315,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 6801
    },
    {
      "epoch": 0.06802,
      "grad_norm": 1.0369213141884206,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 6802
    },
    {
      "epoch": 0.06803,
      "grad_norm": 1.0524636853985736,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 6803
    },
    {
      "epoch": 0.06804,
      "grad_norm": 1.0387365140475044,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 6804
    },
    {
      "epoch": 0.06805,
      "grad_norm": 1.2150483769149856,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 6805
    },
    {
      "epoch": 0.06806,
      "grad_norm": 0.8807324303080953,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 6806
    },
    {
      "epoch": 0.06807,
      "grad_norm": 0.8373757284517774,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 6807
    },
    {
      "epoch": 0.06808,
      "grad_norm": 0.8400667752675757,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 6808
    },
    {
      "epoch": 0.06809,
      "grad_norm": 0.8369993186398413,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 6809
    },
    {
      "epoch": 0.0681,
      "grad_norm": 0.9006507393196466,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 6810
    },
    {
      "epoch": 0.06811,
      "grad_norm": 1.0077180114145714,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 6811
    },
    {
      "epoch": 0.06812,
      "grad_norm": 1.1521055645070202,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 6812
    },
    {
      "epoch": 0.06813,
      "grad_norm": 1.0625428176141143,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 6813
    },
    {
      "epoch": 0.06814,
      "grad_norm": 1.178996563449822,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 6814
    },
    {
      "epoch": 0.06815,
      "grad_norm": 0.8797766928663225,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 6815
    },
    {
      "epoch": 0.06816,
      "grad_norm": 0.8289246518934655,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 6816
    },
    {
      "epoch": 0.06817,
      "grad_norm": 0.8654988446799349,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 6817
    },
    {
      "epoch": 0.06818,
      "grad_norm": 1.0798387368738107,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 6818
    },
    {
      "epoch": 0.06819,
      "grad_norm": 1.1105690600865603,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 6819
    },
    {
      "epoch": 0.0682,
      "grad_norm": 1.0813449551126753,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 6820
    },
    {
      "epoch": 0.06821,
      "grad_norm": 0.96077795414745,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 6821
    },
    {
      "epoch": 0.06822,
      "grad_norm": 0.9659380763604074,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 6822
    },
    {
      "epoch": 0.06823,
      "grad_norm": 1.10215696178397,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 6823
    },
    {
      "epoch": 0.06824,
      "grad_norm": 1.034364867848419,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 6824
    },
    {
      "epoch": 0.06825,
      "grad_norm": 0.9937444135494871,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 6825
    },
    {
      "epoch": 0.06826,
      "grad_norm": 0.9764377761142184,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 6826
    },
    {
      "epoch": 0.06827,
      "grad_norm": 0.969374372811426,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 6827
    },
    {
      "epoch": 0.06828,
      "grad_norm": 1.0350502895060627,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 6828
    },
    {
      "epoch": 0.06829,
      "grad_norm": 1.241961679572367,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 6829
    },
    {
      "epoch": 0.0683,
      "grad_norm": 1.025448801706437,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 6830
    },
    {
      "epoch": 0.06831,
      "grad_norm": 0.9950647336149414,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 6831
    },
    {
      "epoch": 0.06832,
      "grad_norm": 1.1012052576359934,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 6832
    },
    {
      "epoch": 0.06833,
      "grad_norm": 0.8956721313879287,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 6833
    },
    {
      "epoch": 0.06834,
      "grad_norm": 1.1464044341999087,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 6834
    },
    {
      "epoch": 0.06835,
      "grad_norm": 1.09845989400467,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 6835
    },
    {
      "epoch": 0.06836,
      "grad_norm": 1.1577607159035546,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 6836
    },
    {
      "epoch": 0.06837,
      "grad_norm": 0.9624906863376643,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 6837
    },
    {
      "epoch": 0.06838,
      "grad_norm": 1.0526594999167607,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 6838
    },
    {
      "epoch": 0.06839,
      "grad_norm": 1.0673019415333493,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 6839
    },
    {
      "epoch": 0.0684,
      "grad_norm": 1.012103935387283,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 6840
    },
    {
      "epoch": 0.06841,
      "grad_norm": 0.9147560220218148,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 6841
    },
    {
      "epoch": 0.06842,
      "grad_norm": 0.9643937321782413,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 6842
    },
    {
      "epoch": 0.06843,
      "grad_norm": 1.0039478407079416,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 6843
    },
    {
      "epoch": 0.06844,
      "grad_norm": 1.2025234350492864,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 6844
    },
    {
      "epoch": 0.06845,
      "grad_norm": 1.0176494517204697,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 6845
    },
    {
      "epoch": 0.06846,
      "grad_norm": 0.9635201111106092,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 6846
    },
    {
      "epoch": 0.06847,
      "grad_norm": 0.9695541827730285,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 6847
    },
    {
      "epoch": 0.06848,
      "grad_norm": 1.055028109497596,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 6848
    },
    {
      "epoch": 0.06849,
      "grad_norm": 1.2623333403844699,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 6849
    },
    {
      "epoch": 0.0685,
      "grad_norm": 1.0979408883303405,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 6850
    },
    {
      "epoch": 0.06851,
      "grad_norm": 1.2299990074107543,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 6851
    },
    {
      "epoch": 0.06852,
      "grad_norm": 0.8622471399851679,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 6852
    },
    {
      "epoch": 0.06853,
      "grad_norm": 0.797053506469073,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 6853
    },
    {
      "epoch": 0.06854,
      "grad_norm": 0.7428713139049907,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 6854
    },
    {
      "epoch": 0.06855,
      "grad_norm": 0.8702478930012675,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 6855
    },
    {
      "epoch": 0.06856,
      "grad_norm": 0.9565155211806974,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 6856
    },
    {
      "epoch": 0.06857,
      "grad_norm": 0.9401346796927762,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 6857
    },
    {
      "epoch": 0.06858,
      "grad_norm": 1.046352232064668,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 6858
    },
    {
      "epoch": 0.06859,
      "grad_norm": 1.1113366890222538,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 6859
    },
    {
      "epoch": 0.0686,
      "grad_norm": 1.0332225044816656,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 6860
    },
    {
      "epoch": 0.06861,
      "grad_norm": 1.037641447920837,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 6861
    },
    {
      "epoch": 0.06862,
      "grad_norm": 1.0707793888395856,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 6862
    },
    {
      "epoch": 0.06863,
      "grad_norm": 1.3025261367467222,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 6863
    },
    {
      "epoch": 0.06864,
      "grad_norm": 0.7117943067157152,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 6864
    },
    {
      "epoch": 0.06865,
      "grad_norm": 0.8149012494620742,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 6865
    },
    {
      "epoch": 0.06866,
      "grad_norm": 0.9768992618939744,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 6866
    },
    {
      "epoch": 0.06867,
      "grad_norm": 1.2876703502703644,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 6867
    },
    {
      "epoch": 0.06868,
      "grad_norm": 0.8939557694074772,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 6868
    },
    {
      "epoch": 0.06869,
      "grad_norm": 0.9739018190603045,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 6869
    },
    {
      "epoch": 0.0687,
      "grad_norm": 1.0808672350561943,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 6870
    },
    {
      "epoch": 0.06871,
      "grad_norm": 0.9732612594475262,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 6871
    },
    {
      "epoch": 0.06872,
      "grad_norm": 0.9231941574627566,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 6872
    },
    {
      "epoch": 0.06873,
      "grad_norm": 1.0038158255901826,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 6873
    },
    {
      "epoch": 0.06874,
      "grad_norm": 1.1994966137942642,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 6874
    },
    {
      "epoch": 0.06875,
      "grad_norm": 0.9012267255930811,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 6875
    },
    {
      "epoch": 0.06876,
      "grad_norm": 0.958415530478731,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 6876
    },
    {
      "epoch": 0.06877,
      "grad_norm": 0.9667858571717155,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 6877
    },
    {
      "epoch": 0.06878,
      "grad_norm": 1.2429535644925198,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 6878
    },
    {
      "epoch": 0.06879,
      "grad_norm": 0.8678125841491381,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 6879
    },
    {
      "epoch": 0.0688,
      "grad_norm": 0.9483241231979963,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 6880
    },
    {
      "epoch": 0.06881,
      "grad_norm": 1.1081275778671038,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 6881
    },
    {
      "epoch": 0.06882,
      "grad_norm": 0.8941631616443152,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 6882
    },
    {
      "epoch": 0.06883,
      "grad_norm": 1.0008866943662982,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 6883
    },
    {
      "epoch": 0.06884,
      "grad_norm": 1.1763212407023795,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 6884
    },
    {
      "epoch": 0.06885,
      "grad_norm": 0.947660863840069,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 6885
    },
    {
      "epoch": 0.06886,
      "grad_norm": 1.0445333316933652,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 6886
    },
    {
      "epoch": 0.06887,
      "grad_norm": 1.2206210118596559,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 6887
    },
    {
      "epoch": 0.06888,
      "grad_norm": 1.0692166428843015,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 6888
    },
    {
      "epoch": 0.06889,
      "grad_norm": 0.9355380989508778,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 6889
    },
    {
      "epoch": 0.0689,
      "grad_norm": 1.158851574547141,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 6890
    },
    {
      "epoch": 0.06891,
      "grad_norm": 0.8775458730749034,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 6891
    },
    {
      "epoch": 0.06892,
      "grad_norm": 1.0821142493586515,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 6892
    },
    {
      "epoch": 0.06893,
      "grad_norm": 1.0013455149294792,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 6893
    },
    {
      "epoch": 0.06894,
      "grad_norm": 1.1973645911348245,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 6894
    },
    {
      "epoch": 0.06895,
      "grad_norm": 0.9524668934091803,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 6895
    },
    {
      "epoch": 0.06896,
      "grad_norm": 1.042772383295928,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 6896
    },
    {
      "epoch": 0.06897,
      "grad_norm": 1.0973998064718429,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 6897
    },
    {
      "epoch": 0.06898,
      "grad_norm": 0.941011645940574,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 6898
    },
    {
      "epoch": 0.06899,
      "grad_norm": 0.9259127376564472,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 6899
    },
    {
      "epoch": 0.069,
      "grad_norm": 1.0042825475944963,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 6900
    },
    {
      "epoch": 0.06901,
      "grad_norm": 1.0504783164304958,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 6901
    },
    {
      "epoch": 0.06902,
      "grad_norm": 1.001395184859859,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 6902
    },
    {
      "epoch": 0.06903,
      "grad_norm": 0.9533095870741758,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 6903
    },
    {
      "epoch": 0.06904,
      "grad_norm": 0.8946382452071323,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 6904
    },
    {
      "epoch": 0.06905,
      "grad_norm": 0.9637064412868039,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 6905
    },
    {
      "epoch": 0.06906,
      "grad_norm": 1.0077282899066335,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 6906
    },
    {
      "epoch": 0.06907,
      "grad_norm": 1.1560211480862905,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 6907
    },
    {
      "epoch": 0.06908,
      "grad_norm": 0.9093203754997685,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 6908
    },
    {
      "epoch": 0.06909,
      "grad_norm": 0.9106719670133724,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 6909
    },
    {
      "epoch": 0.0691,
      "grad_norm": 1.0352808190093583,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 6910
    },
    {
      "epoch": 0.06911,
      "grad_norm": 1.172505571950034,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 6911
    },
    {
      "epoch": 0.06912,
      "grad_norm": 1.004717986434819,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 6912
    },
    {
      "epoch": 0.06913,
      "grad_norm": 1.0346811560718048,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 6913
    },
    {
      "epoch": 0.06914,
      "grad_norm": 0.9902540554926873,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 6914
    },
    {
      "epoch": 0.06915,
      "grad_norm": 1.3265154292186019,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 6915
    },
    {
      "epoch": 0.06916,
      "grad_norm": 1.0096731892843975,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 6916
    },
    {
      "epoch": 0.06917,
      "grad_norm": 1.1017730347115613,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 6917
    },
    {
      "epoch": 0.06918,
      "grad_norm": 1.2367019859366197,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 6918
    },
    {
      "epoch": 0.06919,
      "grad_norm": 0.9219679674649628,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 6919
    },
    {
      "epoch": 0.0692,
      "grad_norm": 0.991754900343874,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 6920
    },
    {
      "epoch": 0.06921,
      "grad_norm": 1.2175829497274775,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 6921
    },
    {
      "epoch": 0.06922,
      "grad_norm": 0.9154029403754875,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 6922
    },
    {
      "epoch": 0.06923,
      "grad_norm": 1.0695987825330893,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 6923
    },
    {
      "epoch": 0.06924,
      "grad_norm": 0.940173575072378,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 6924
    },
    {
      "epoch": 0.06925,
      "grad_norm": 1.1072649608493197,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 6925
    },
    {
      "epoch": 0.06926,
      "grad_norm": 1.0433608354839305,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 6926
    },
    {
      "epoch": 0.06927,
      "grad_norm": 1.0081972527274148,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 6927
    },
    {
      "epoch": 0.06928,
      "grad_norm": 1.1110960603111513,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 6928
    },
    {
      "epoch": 0.06929,
      "grad_norm": 1.020106821680874,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 6929
    },
    {
      "epoch": 0.0693,
      "grad_norm": 1.1534854540437196,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 6930
    },
    {
      "epoch": 0.06931,
      "grad_norm": 0.9801283370812117,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 6931
    },
    {
      "epoch": 0.06932,
      "grad_norm": 1.0759745476144513,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 6932
    },
    {
      "epoch": 0.06933,
      "grad_norm": 1.1318320501940358,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 6933
    },
    {
      "epoch": 0.06934,
      "grad_norm": 0.9953954741024389,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 6934
    },
    {
      "epoch": 0.06935,
      "grad_norm": 1.0404905470053114,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 6935
    },
    {
      "epoch": 0.06936,
      "grad_norm": 1.0468597561286765,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 6936
    },
    {
      "epoch": 0.06937,
      "grad_norm": 1.0201183676680066,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 6937
    },
    {
      "epoch": 0.06938,
      "grad_norm": 1.0787460833199358,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 6938
    },
    {
      "epoch": 0.06939,
      "grad_norm": 1.0901798974191894,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 6939
    },
    {
      "epoch": 0.0694,
      "grad_norm": 0.9385122020121525,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 6940
    },
    {
      "epoch": 0.06941,
      "grad_norm": 1.09751042724724,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 6941
    },
    {
      "epoch": 0.06942,
      "grad_norm": 1.0152502004249075,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 6942
    },
    {
      "epoch": 0.06943,
      "grad_norm": 1.1106778728034443,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 6943
    },
    {
      "epoch": 0.06944,
      "grad_norm": 0.9402245095674768,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 6944
    },
    {
      "epoch": 0.06945,
      "grad_norm": 1.0498365010646888,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 6945
    },
    {
      "epoch": 0.06946,
      "grad_norm": 1.04938260769981,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 6946
    },
    {
      "epoch": 0.06947,
      "grad_norm": 1.2040313407275116,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 6947
    },
    {
      "epoch": 0.06948,
      "grad_norm": 0.9174434519705801,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 6948
    },
    {
      "epoch": 0.06949,
      "grad_norm": 1.056561705670037,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 6949
    },
    {
      "epoch": 0.0695,
      "grad_norm": 1.1160879395367673,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 6950
    },
    {
      "epoch": 0.06951,
      "grad_norm": 0.9311634834480271,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 6951
    },
    {
      "epoch": 0.06952,
      "grad_norm": 1.0397971869811435,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 6952
    },
    {
      "epoch": 0.06953,
      "grad_norm": 1.1245034234997564,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 6953
    },
    {
      "epoch": 0.06954,
      "grad_norm": 1.0386924889956406,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 6954
    },
    {
      "epoch": 0.06955,
      "grad_norm": 0.9732927657279983,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 6955
    },
    {
      "epoch": 0.06956,
      "grad_norm": 0.9919815526911104,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 6956
    },
    {
      "epoch": 0.06957,
      "grad_norm": 0.9511426911729295,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 6957
    },
    {
      "epoch": 0.06958,
      "grad_norm": 0.936900952232894,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 6958
    },
    {
      "epoch": 0.06959,
      "grad_norm": 0.9506093199052728,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 6959
    },
    {
      "epoch": 0.0696,
      "grad_norm": 0.894303598657002,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 6960
    },
    {
      "epoch": 0.06961,
      "grad_norm": 0.860669748877687,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 6961
    },
    {
      "epoch": 0.06962,
      "grad_norm": 0.9634371666233146,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 6962
    },
    {
      "epoch": 0.06963,
      "grad_norm": 1.0960373688281182,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 6963
    },
    {
      "epoch": 0.06964,
      "grad_norm": 1.1321187322512247,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 6964
    },
    {
      "epoch": 0.06965,
      "grad_norm": 1.0060414603670687,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 6965
    },
    {
      "epoch": 0.06966,
      "grad_norm": 1.0380239906833486,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 6966
    },
    {
      "epoch": 0.06967,
      "grad_norm": 1.1084883931180407,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 6967
    },
    {
      "epoch": 0.06968,
      "grad_norm": 0.9467387430672358,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 6968
    },
    {
      "epoch": 0.06969,
      "grad_norm": 0.9622808523217883,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 6969
    },
    {
      "epoch": 0.0697,
      "grad_norm": 1.2314020219415485,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 6970
    },
    {
      "epoch": 0.06971,
      "grad_norm": 1.1260021071930193,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 6971
    },
    {
      "epoch": 0.06972,
      "grad_norm": 0.9154963484107886,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 6972
    },
    {
      "epoch": 0.06973,
      "grad_norm": 0.8840670259700667,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 6973
    },
    {
      "epoch": 0.06974,
      "grad_norm": 0.8851178357236568,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 6974
    },
    {
      "epoch": 0.06975,
      "grad_norm": 0.9524411300067905,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 6975
    },
    {
      "epoch": 0.06976,
      "grad_norm": 1.088636962287439,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 6976
    },
    {
      "epoch": 0.06977,
      "grad_norm": 1.1071976636641974,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 6977
    },
    {
      "epoch": 0.06978,
      "grad_norm": 0.9798522761711383,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 6978
    },
    {
      "epoch": 0.06979,
      "grad_norm": 0.807513012542661,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 6979
    },
    {
      "epoch": 0.0698,
      "grad_norm": 1.0085245208426825,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 6980
    },
    {
      "epoch": 0.06981,
      "grad_norm": 1.0861815134074257,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 6981
    },
    {
      "epoch": 0.06982,
      "grad_norm": 0.7980450645757602,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 6982
    },
    {
      "epoch": 0.06983,
      "grad_norm": 0.9321566297048406,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 6983
    },
    {
      "epoch": 0.06984,
      "grad_norm": 1.0721971827592451,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 6984
    },
    {
      "epoch": 0.06985,
      "grad_norm": 1.0237038863583252,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 6985
    },
    {
      "epoch": 0.06986,
      "grad_norm": 1.0546187122638413,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 6986
    },
    {
      "epoch": 0.06987,
      "grad_norm": 1.1037883958736543,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 6987
    },
    {
      "epoch": 0.06988,
      "grad_norm": 1.0507061025832318,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 6988
    },
    {
      "epoch": 0.06989,
      "grad_norm": 1.0236366890111934,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 6989
    },
    {
      "epoch": 0.0699,
      "grad_norm": 1.133782459769843,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 6990
    },
    {
      "epoch": 0.06991,
      "grad_norm": 0.9260651932828348,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 6991
    },
    {
      "epoch": 0.06992,
      "grad_norm": 0.9925580541567944,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 6992
    },
    {
      "epoch": 0.06993,
      "grad_norm": 1.067990124386425,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 6993
    },
    {
      "epoch": 0.06994,
      "grad_norm": 0.9360919285448381,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 6994
    },
    {
      "epoch": 0.06995,
      "grad_norm": 0.96458377970943,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 6995
    },
    {
      "epoch": 0.06996,
      "grad_norm": 1.148084887236895,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 6996
    },
    {
      "epoch": 0.06997,
      "grad_norm": 0.9046547601765281,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 6997
    },
    {
      "epoch": 0.06998,
      "grad_norm": 0.8772615969985239,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 6998
    },
    {
      "epoch": 0.06999,
      "grad_norm": 0.9340057257128744,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 6999
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0565666461050007,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 7000
    },
    {
      "epoch": 0.07001,
      "grad_norm": 0.9426298349737786,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 7001
    },
    {
      "epoch": 0.07002,
      "grad_norm": 1.0664130531690763,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 7002
    },
    {
      "epoch": 0.07003,
      "grad_norm": 1.1291863119100505,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 7003
    },
    {
      "epoch": 0.07004,
      "grad_norm": 1.0642470026156752,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 7004
    },
    {
      "epoch": 0.07005,
      "grad_norm": 1.2390880199207346,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 7005
    },
    {
      "epoch": 0.07006,
      "grad_norm": 0.9062982273502466,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 7006
    },
    {
      "epoch": 0.07007,
      "grad_norm": 1.1259354802693449,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 7007
    },
    {
      "epoch": 0.07008,
      "grad_norm": 1.0839899240911712,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 7008
    },
    {
      "epoch": 0.07009,
      "grad_norm": 1.068714363123779,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 7009
    },
    {
      "epoch": 0.0701,
      "grad_norm": 0.9452433028423766,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 7010
    },
    {
      "epoch": 0.07011,
      "grad_norm": 0.9919155661649216,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 7011
    },
    {
      "epoch": 0.07012,
      "grad_norm": 1.2609870971082537,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 7012
    },
    {
      "epoch": 0.07013,
      "grad_norm": 0.9993261593044893,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 7013
    },
    {
      "epoch": 0.07014,
      "grad_norm": 1.0685461199627173,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 7014
    },
    {
      "epoch": 0.07015,
      "grad_norm": 0.8992285999432262,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 7015
    },
    {
      "epoch": 0.07016,
      "grad_norm": 0.8983621405017727,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 7016
    },
    {
      "epoch": 0.07017,
      "grad_norm": 0.9901752576746331,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 7017
    },
    {
      "epoch": 0.07018,
      "grad_norm": 1.1074224986234265,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 7018
    },
    {
      "epoch": 0.07019,
      "grad_norm": 1.057295544596109,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 7019
    },
    {
      "epoch": 0.0702,
      "grad_norm": 1.0481341550110743,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 7020
    },
    {
      "epoch": 0.07021,
      "grad_norm": 1.0407732639272513,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 7021
    },
    {
      "epoch": 0.07022,
      "grad_norm": 1.1448172616705823,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 7022
    },
    {
      "epoch": 0.07023,
      "grad_norm": 1.0313350981765694,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 7023
    },
    {
      "epoch": 0.07024,
      "grad_norm": 1.0020252731504273,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 7024
    },
    {
      "epoch": 0.07025,
      "grad_norm": 1.1359222787122534,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 7025
    },
    {
      "epoch": 0.07026,
      "grad_norm": 1.1041895748549755,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 7026
    },
    {
      "epoch": 0.07027,
      "grad_norm": 0.9841999651428623,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 7027
    },
    {
      "epoch": 0.07028,
      "grad_norm": 1.025891345196206,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 7028
    },
    {
      "epoch": 0.07029,
      "grad_norm": 0.9948854364695006,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 7029
    },
    {
      "epoch": 0.0703,
      "grad_norm": 0.9668935895325885,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 7030
    },
    {
      "epoch": 0.07031,
      "grad_norm": 1.0575663087737428,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 7031
    },
    {
      "epoch": 0.07032,
      "grad_norm": 0.9164666976168099,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 7032
    },
    {
      "epoch": 0.07033,
      "grad_norm": 0.9214454265285555,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 7033
    },
    {
      "epoch": 0.07034,
      "grad_norm": 0.9583267727565041,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 7034
    },
    {
      "epoch": 0.07035,
      "grad_norm": 1.3383936324024557,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 7035
    },
    {
      "epoch": 0.07036,
      "grad_norm": 0.8658946616429294,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 7036
    },
    {
      "epoch": 0.07037,
      "grad_norm": 0.9475554646881245,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 7037
    },
    {
      "epoch": 0.07038,
      "grad_norm": 0.8924840709336554,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 7038
    },
    {
      "epoch": 0.07039,
      "grad_norm": 0.8951723916341889,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 7039
    },
    {
      "epoch": 0.0704,
      "grad_norm": 0.854416819628114,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 7040
    },
    {
      "epoch": 0.07041,
      "grad_norm": 0.9333352868640866,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 7041
    },
    {
      "epoch": 0.07042,
      "grad_norm": 1.094975909783682,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 7042
    },
    {
      "epoch": 0.07043,
      "grad_norm": 1.111332862841996,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 7043
    },
    {
      "epoch": 0.07044,
      "grad_norm": 1.0657438894896911,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 7044
    },
    {
      "epoch": 0.07045,
      "grad_norm": 1.214943958168645,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 7045
    },
    {
      "epoch": 0.07046,
      "grad_norm": 1.0052618021881827,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 7046
    },
    {
      "epoch": 0.07047,
      "grad_norm": 1.2069549174110699,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 7047
    },
    {
      "epoch": 0.07048,
      "grad_norm": 0.7843964117630299,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 7048
    },
    {
      "epoch": 0.07049,
      "grad_norm": 0.691374439878814,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 7049
    },
    {
      "epoch": 0.0705,
      "grad_norm": 0.7306826895178101,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 7050
    },
    {
      "epoch": 0.07051,
      "grad_norm": 0.8933100091919502,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 7051
    },
    {
      "epoch": 0.07052,
      "grad_norm": 1.0871930655736466,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 7052
    },
    {
      "epoch": 0.07053,
      "grad_norm": 1.306112767575483,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 7053
    },
    {
      "epoch": 0.07054,
      "grad_norm": 0.7906641478755512,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 7054
    },
    {
      "epoch": 0.07055,
      "grad_norm": 0.8522746819689732,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 7055
    },
    {
      "epoch": 0.07056,
      "grad_norm": 0.9004086354520517,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 7056
    },
    {
      "epoch": 0.07057,
      "grad_norm": 0.9624496801222439,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 7057
    },
    {
      "epoch": 0.07058,
      "grad_norm": 0.8808033290776586,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 7058
    },
    {
      "epoch": 0.07059,
      "grad_norm": 1.0148842321503468,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 7059
    },
    {
      "epoch": 0.0706,
      "grad_norm": 1.0661395469335966,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 7060
    },
    {
      "epoch": 0.07061,
      "grad_norm": 1.033267111562011,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 7061
    },
    {
      "epoch": 0.07062,
      "grad_norm": 1.1023501283703616,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 7062
    },
    {
      "epoch": 0.07063,
      "grad_norm": 1.1371048285194043,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 7063
    },
    {
      "epoch": 0.07064,
      "grad_norm": 1.0600698489467515,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 7064
    },
    {
      "epoch": 0.07065,
      "grad_norm": 1.0845674242904877,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 7065
    },
    {
      "epoch": 0.07066,
      "grad_norm": 1.0117517612735492,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 7066
    },
    {
      "epoch": 0.07067,
      "grad_norm": 1.1834369979286008,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 7067
    },
    {
      "epoch": 0.07068,
      "grad_norm": 0.9912589707934546,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 7068
    },
    {
      "epoch": 0.07069,
      "grad_norm": 1.008241016661584,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 7069
    },
    {
      "epoch": 0.0707,
      "grad_norm": 1.1771304332271513,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 7070
    },
    {
      "epoch": 0.07071,
      "grad_norm": 1.1143211837122156,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 7071
    },
    {
      "epoch": 0.07072,
      "grad_norm": 0.9342779082515223,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 7072
    },
    {
      "epoch": 0.07073,
      "grad_norm": 0.8837711443464364,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 7073
    },
    {
      "epoch": 0.07074,
      "grad_norm": 1.0273879438054683,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 7074
    },
    {
      "epoch": 0.07075,
      "grad_norm": 1.1016591310376989,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 7075
    },
    {
      "epoch": 0.07076,
      "grad_norm": 1.105334861387564,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 7076
    },
    {
      "epoch": 0.07077,
      "grad_norm": 0.9146056746290705,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 7077
    },
    {
      "epoch": 0.07078,
      "grad_norm": 0.8766079673528181,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 7078
    },
    {
      "epoch": 0.07079,
      "grad_norm": 0.9316844058335465,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 7079
    },
    {
      "epoch": 0.0708,
      "grad_norm": 1.0866064222511407,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 7080
    },
    {
      "epoch": 0.07081,
      "grad_norm": 0.9789936615170471,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 7081
    },
    {
      "epoch": 0.07082,
      "grad_norm": 1.1021827553494439,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 7082
    },
    {
      "epoch": 0.07083,
      "grad_norm": 1.0245533209037434,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 7083
    },
    {
      "epoch": 0.07084,
      "grad_norm": 1.0621477148568337,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 7084
    },
    {
      "epoch": 0.07085,
      "grad_norm": 1.1601015540652249,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 7085
    },
    {
      "epoch": 0.07086,
      "grad_norm": 1.0854553734016084,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 7086
    },
    {
      "epoch": 0.07087,
      "grad_norm": 1.3922698043387116,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 7087
    },
    {
      "epoch": 0.07088,
      "grad_norm": 0.9399003500801127,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 7088
    },
    {
      "epoch": 0.07089,
      "grad_norm": 1.106468816582972,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 7089
    },
    {
      "epoch": 0.0709,
      "grad_norm": 1.1027532636474677,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 7090
    },
    {
      "epoch": 0.07091,
      "grad_norm": 1.0374251515271373,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 7091
    },
    {
      "epoch": 0.07092,
      "grad_norm": 1.1299226085447214,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 7092
    },
    {
      "epoch": 0.07093,
      "grad_norm": 0.9253623127929507,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 7093
    },
    {
      "epoch": 0.07094,
      "grad_norm": 1.1587332040574374,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 7094
    },
    {
      "epoch": 0.07095,
      "grad_norm": 0.7769235402798084,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 7095
    },
    {
      "epoch": 0.07096,
      "grad_norm": 0.8053305671402123,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 7096
    },
    {
      "epoch": 0.07097,
      "grad_norm": 0.9209272441259044,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 7097
    },
    {
      "epoch": 0.07098,
      "grad_norm": 1.0625889845082845,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 7098
    },
    {
      "epoch": 0.07099,
      "grad_norm": 1.0498047751329527,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 7099
    },
    {
      "epoch": 0.071,
      "grad_norm": 1.224649756474013,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 7100
    },
    {
      "epoch": 0.07101,
      "grad_norm": 1.008032355385532,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 7101
    },
    {
      "epoch": 0.07102,
      "grad_norm": 1.0752805417902511,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 7102
    },
    {
      "epoch": 0.07103,
      "grad_norm": 1.0092223551265274,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 7103
    },
    {
      "epoch": 0.07104,
      "grad_norm": 1.072487221815657,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 7104
    },
    {
      "epoch": 0.07105,
      "grad_norm": 1.1260402490166252,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 7105
    },
    {
      "epoch": 0.07106,
      "grad_norm": 0.9259910792008968,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 7106
    },
    {
      "epoch": 0.07107,
      "grad_norm": 0.9680226789669217,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 7107
    },
    {
      "epoch": 0.07108,
      "grad_norm": 0.935158998841827,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 7108
    },
    {
      "epoch": 0.07109,
      "grad_norm": 1.0085487694764077,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 7109
    },
    {
      "epoch": 0.0711,
      "grad_norm": 1.196671503733759,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 7110
    },
    {
      "epoch": 0.07111,
      "grad_norm": 0.9584229952834501,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 7111
    },
    {
      "epoch": 0.07112,
      "grad_norm": 0.9641919680290156,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 7112
    },
    {
      "epoch": 0.07113,
      "grad_norm": 0.9993706432398796,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 7113
    },
    {
      "epoch": 0.07114,
      "grad_norm": 1.1499489634058697,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 7114
    },
    {
      "epoch": 0.07115,
      "grad_norm": 1.194464795275503,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 7115
    },
    {
      "epoch": 0.07116,
      "grad_norm": 1.21121303150807,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 7116
    },
    {
      "epoch": 0.07117,
      "grad_norm": 1.0049044123637882,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 7117
    },
    {
      "epoch": 0.07118,
      "grad_norm": 1.2298880131028436,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 7118
    },
    {
      "epoch": 0.07119,
      "grad_norm": 0.8611868402134719,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 7119
    },
    {
      "epoch": 0.0712,
      "grad_norm": 0.7702510545372283,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 7120
    },
    {
      "epoch": 0.07121,
      "grad_norm": 0.8954788767538316,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 7121
    },
    {
      "epoch": 0.07122,
      "grad_norm": 1.0842710437040461,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 7122
    },
    {
      "epoch": 0.07123,
      "grad_norm": 0.9923425116430409,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 7123
    },
    {
      "epoch": 0.07124,
      "grad_norm": 1.1813157355705963,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 7124
    },
    {
      "epoch": 0.07125,
      "grad_norm": 1.2554701276201077,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 7125
    },
    {
      "epoch": 0.07126,
      "grad_norm": 0.9773001931060757,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 7126
    },
    {
      "epoch": 0.07127,
      "grad_norm": 1.0712269928450333,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 7127
    },
    {
      "epoch": 0.07128,
      "grad_norm": 1.0739153312552485,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 7128
    },
    {
      "epoch": 0.07129,
      "grad_norm": 1.041760051339172,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 7129
    },
    {
      "epoch": 0.0713,
      "grad_norm": 1.079087703256973,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 7130
    },
    {
      "epoch": 0.07131,
      "grad_norm": 0.9759469792704608,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 7131
    },
    {
      "epoch": 0.07132,
      "grad_norm": 1.048658582228863,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 7132
    },
    {
      "epoch": 0.07133,
      "grad_norm": 1.0618378394098185,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 7133
    },
    {
      "epoch": 0.07134,
      "grad_norm": 1.1513266730887497,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 7134
    },
    {
      "epoch": 0.07135,
      "grad_norm": 0.9259298591427148,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 7135
    },
    {
      "epoch": 0.07136,
      "grad_norm": 1.0755737256450422,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 7136
    },
    {
      "epoch": 0.07137,
      "grad_norm": 1.177551226290782,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 7137
    },
    {
      "epoch": 0.07138,
      "grad_norm": 1.22091533451439,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 7138
    },
    {
      "epoch": 0.07139,
      "grad_norm": 0.7759575681946775,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 7139
    },
    {
      "epoch": 0.0714,
      "grad_norm": 0.7408826660171673,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 7140
    },
    {
      "epoch": 0.07141,
      "grad_norm": 0.7146551856791451,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 7141
    },
    {
      "epoch": 0.07142,
      "grad_norm": 0.9506570889697072,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 7142
    },
    {
      "epoch": 0.07143,
      "grad_norm": 1.30251677781438,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 7143
    },
    {
      "epoch": 0.07144,
      "grad_norm": 0.8991561050670873,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 7144
    },
    {
      "epoch": 0.07145,
      "grad_norm": 1.0074973465006982,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 7145
    },
    {
      "epoch": 0.07146,
      "grad_norm": 1.0771529601001637,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 7146
    },
    {
      "epoch": 0.07147,
      "grad_norm": 0.8612872764436263,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 7147
    },
    {
      "epoch": 0.07148,
      "grad_norm": 0.9341113795199162,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 7148
    },
    {
      "epoch": 0.07149,
      "grad_norm": 1.0062478097452772,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 7149
    },
    {
      "epoch": 0.0715,
      "grad_norm": 1.0268598684975616,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 7150
    },
    {
      "epoch": 0.07151,
      "grad_norm": 1.0224439130582545,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 7151
    },
    {
      "epoch": 0.07152,
      "grad_norm": 1.0646975295350594,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 7152
    },
    {
      "epoch": 0.07153,
      "grad_norm": 1.0171664033211174,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 7153
    },
    {
      "epoch": 0.07154,
      "grad_norm": 1.0400329068005092,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 7154
    },
    {
      "epoch": 0.07155,
      "grad_norm": 0.9604129137098717,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 7155
    },
    {
      "epoch": 0.07156,
      "grad_norm": 1.0703502921341488,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 7156
    },
    {
      "epoch": 0.07157,
      "grad_norm": 1.2643947181831285,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 7157
    },
    {
      "epoch": 0.07158,
      "grad_norm": 0.8820140566698819,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 7158
    },
    {
      "epoch": 0.07159,
      "grad_norm": 0.9999756514750661,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 7159
    },
    {
      "epoch": 0.0716,
      "grad_norm": 1.2329285675483432,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 7160
    },
    {
      "epoch": 0.07161,
      "grad_norm": 0.9648860180858708,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 7161
    },
    {
      "epoch": 0.07162,
      "grad_norm": 1.1598981829481805,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 7162
    },
    {
      "epoch": 0.07163,
      "grad_norm": 1.0352892191066683,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 7163
    },
    {
      "epoch": 0.07164,
      "grad_norm": 1.1005133026385345,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 7164
    },
    {
      "epoch": 0.07165,
      "grad_norm": 1.081455462126825,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 7165
    },
    {
      "epoch": 0.07166,
      "grad_norm": 1.2353060960241047,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 7166
    },
    {
      "epoch": 0.07167,
      "grad_norm": 0.9483072899067242,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 7167
    },
    {
      "epoch": 0.07168,
      "grad_norm": 0.9415550285281931,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 7168
    },
    {
      "epoch": 0.07169,
      "grad_norm": 1.0091462587193163,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 7169
    },
    {
      "epoch": 0.0717,
      "grad_norm": 1.0220680948634882,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 7170
    },
    {
      "epoch": 0.07171,
      "grad_norm": 0.9757190374038757,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 7171
    },
    {
      "epoch": 0.07172,
      "grad_norm": 1.1004971741989973,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 7172
    },
    {
      "epoch": 0.07173,
      "grad_norm": 0.9543571710406531,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 7173
    },
    {
      "epoch": 0.07174,
      "grad_norm": 1.087698472674678,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 7174
    },
    {
      "epoch": 0.07175,
      "grad_norm": 1.1109156909053697,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 7175
    },
    {
      "epoch": 0.07176,
      "grad_norm": 0.9503187829288862,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 7176
    },
    {
      "epoch": 0.07177,
      "grad_norm": 0.9936594901006058,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 7177
    },
    {
      "epoch": 0.07178,
      "grad_norm": 1.061316451230859,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 7178
    },
    {
      "epoch": 0.07179,
      "grad_norm": 0.9212825081885807,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 7179
    },
    {
      "epoch": 0.0718,
      "grad_norm": 1.0771371633803515,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 7180
    },
    {
      "epoch": 0.07181,
      "grad_norm": 1.0622270106395655,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 7181
    },
    {
      "epoch": 0.07182,
      "grad_norm": 1.2387354855038064,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 7182
    },
    {
      "epoch": 0.07183,
      "grad_norm": 0.9639603006489462,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 7183
    },
    {
      "epoch": 0.07184,
      "grad_norm": 1.0093619599090504,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 7184
    },
    {
      "epoch": 0.07185,
      "grad_norm": 0.9714974637831857,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 7185
    },
    {
      "epoch": 0.07186,
      "grad_norm": 0.9153837367744138,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 7186
    },
    {
      "epoch": 0.07187,
      "grad_norm": 0.9316846582768762,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 7187
    },
    {
      "epoch": 0.07188,
      "grad_norm": 1.0594153818209886,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 7188
    },
    {
      "epoch": 0.07189,
      "grad_norm": 0.918217273642505,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 7189
    },
    {
      "epoch": 0.0719,
      "grad_norm": 1.0085899488533316,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 7190
    },
    {
      "epoch": 0.07191,
      "grad_norm": 1.1236120434513654,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 7191
    },
    {
      "epoch": 0.07192,
      "grad_norm": 1.0142883651839383,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 7192
    },
    {
      "epoch": 0.07193,
      "grad_norm": 0.976978007096748,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 7193
    },
    {
      "epoch": 0.07194,
      "grad_norm": 1.1423821853316132,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 7194
    },
    {
      "epoch": 0.07195,
      "grad_norm": 1.105242324637109,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 7195
    },
    {
      "epoch": 0.07196,
      "grad_norm": 1.050425013065668,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 7196
    },
    {
      "epoch": 0.07197,
      "grad_norm": 1.1236152449610997,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 7197
    },
    {
      "epoch": 0.07198,
      "grad_norm": 1.0814707926776106,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 7198
    },
    {
      "epoch": 0.07199,
      "grad_norm": 1.1497210191318876,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 7199
    },
    {
      "epoch": 0.072,
      "grad_norm": 0.9814431913033573,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 7200
    },
    {
      "epoch": 0.07201,
      "grad_norm": 0.9399249347861436,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 7201
    },
    {
      "epoch": 0.07202,
      "grad_norm": 1.0283475742305797,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 7202
    },
    {
      "epoch": 0.07203,
      "grad_norm": 1.3436302183163868,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 7203
    },
    {
      "epoch": 0.07204,
      "grad_norm": 0.8118935525393225,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 7204
    },
    {
      "epoch": 0.07205,
      "grad_norm": 0.6513445176109008,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 7205
    },
    {
      "epoch": 0.07206,
      "grad_norm": 0.7054255168362077,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 7206
    },
    {
      "epoch": 0.07207,
      "grad_norm": 0.754052659367641,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 7207
    },
    {
      "epoch": 0.07208,
      "grad_norm": 0.9086098698061754,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 7208
    },
    {
      "epoch": 0.07209,
      "grad_norm": 1.1574952413730935,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 7209
    },
    {
      "epoch": 0.0721,
      "grad_norm": 0.8639251076932414,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 7210
    },
    {
      "epoch": 0.07211,
      "grad_norm": 0.8049066976652748,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 7211
    },
    {
      "epoch": 0.07212,
      "grad_norm": 0.9012892110405708,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 7212
    },
    {
      "epoch": 0.07213,
      "grad_norm": 1.0195248366641139,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 7213
    },
    {
      "epoch": 0.07214,
      "grad_norm": 1.0957107033402924,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 7214
    },
    {
      "epoch": 0.07215,
      "grad_norm": 1.130563317224989,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 7215
    },
    {
      "epoch": 0.07216,
      "grad_norm": 1.0776995902257729,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 7216
    },
    {
      "epoch": 0.07217,
      "grad_norm": 0.9762456922279278,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 7217
    },
    {
      "epoch": 0.07218,
      "grad_norm": 1.0572760232733118,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 7218
    },
    {
      "epoch": 0.07219,
      "grad_norm": 1.1200235428034362,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 7219
    },
    {
      "epoch": 0.0722,
      "grad_norm": 0.9279710532680243,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 7220
    },
    {
      "epoch": 0.07221,
      "grad_norm": 1.0865227213549595,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 7221
    },
    {
      "epoch": 0.07222,
      "grad_norm": 1.1530440294312476,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 7222
    },
    {
      "epoch": 0.07223,
      "grad_norm": 1.105728877799717,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 7223
    },
    {
      "epoch": 0.07224,
      "grad_norm": 1.246082917171359,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 7224
    },
    {
      "epoch": 0.07225,
      "grad_norm": 0.8775383173500949,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 7225
    },
    {
      "epoch": 0.07226,
      "grad_norm": 0.9055827431051069,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 7226
    },
    {
      "epoch": 0.07227,
      "grad_norm": 0.9729711259743795,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 7227
    },
    {
      "epoch": 0.07228,
      "grad_norm": 1.122946918012819,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 7228
    },
    {
      "epoch": 0.07229,
      "grad_norm": 1.145241716228926,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 7229
    },
    {
      "epoch": 0.0723,
      "grad_norm": 1.1809619676313377,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 7230
    },
    {
      "epoch": 0.07231,
      "grad_norm": 1.0638511137158617,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 7231
    },
    {
      "epoch": 0.07232,
      "grad_norm": 1.3174222410179524,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 7232
    },
    {
      "epoch": 0.07233,
      "grad_norm": 0.7594174026538151,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 7233
    },
    {
      "epoch": 0.07234,
      "grad_norm": 0.7113003269832634,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 7234
    },
    {
      "epoch": 0.07235,
      "grad_norm": 0.7666612536000506,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 7235
    },
    {
      "epoch": 0.07236,
      "grad_norm": 1.0228879699076707,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 7236
    },
    {
      "epoch": 0.07237,
      "grad_norm": 1.0627669746296777,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 7237
    },
    {
      "epoch": 0.07238,
      "grad_norm": 0.9408632718140275,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 7238
    },
    {
      "epoch": 0.07239,
      "grad_norm": 1.07854762597468,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 7239
    },
    {
      "epoch": 0.0724,
      "grad_norm": 1.1116574655009852,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 7240
    },
    {
      "epoch": 0.07241,
      "grad_norm": 1.0625991735895157,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 7241
    },
    {
      "epoch": 0.07242,
      "grad_norm": 1.1130412168878459,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 7242
    },
    {
      "epoch": 0.07243,
      "grad_norm": 1.0523505008259475,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 7243
    },
    {
      "epoch": 0.07244,
      "grad_norm": 1.1814825129506323,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 7244
    },
    {
      "epoch": 0.07245,
      "grad_norm": 0.9545116614676915,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 7245
    },
    {
      "epoch": 0.07246,
      "grad_norm": 1.0800472059853927,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 7246
    },
    {
      "epoch": 0.07247,
      "grad_norm": 0.8747307815336857,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 7247
    },
    {
      "epoch": 0.07248,
      "grad_norm": 0.9457171551359618,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 7248
    },
    {
      "epoch": 0.07249,
      "grad_norm": 0.9746074762050819,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 7249
    },
    {
      "epoch": 0.0725,
      "grad_norm": 1.0830316386031944,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 7250
    },
    {
      "epoch": 0.07251,
      "grad_norm": 1.1295155297093291,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 7251
    },
    {
      "epoch": 0.07252,
      "grad_norm": 1.0574374191488838,
      "learning_rate": 0.003,
      "loss": 3.993,
      "step": 7252
    },
    {
      "epoch": 0.07253,
      "grad_norm": 1.1371667240401926,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 7253
    },
    {
      "epoch": 0.07254,
      "grad_norm": 0.8575120967224161,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 7254
    },
    {
      "epoch": 0.07255,
      "grad_norm": 1.2040896120213669,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 7255
    },
    {
      "epoch": 0.07256,
      "grad_norm": 1.0282986308414142,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 7256
    },
    {
      "epoch": 0.07257,
      "grad_norm": 1.1867368710695958,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 7257
    },
    {
      "epoch": 0.07258,
      "grad_norm": 0.8751280922843936,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 7258
    },
    {
      "epoch": 0.07259,
      "grad_norm": 0.9468806915219505,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 7259
    },
    {
      "epoch": 0.0726,
      "grad_norm": 1.13462604181537,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 7260
    },
    {
      "epoch": 0.07261,
      "grad_norm": 0.9480119807528755,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 7261
    },
    {
      "epoch": 0.07262,
      "grad_norm": 1.0086785799416162,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 7262
    },
    {
      "epoch": 0.07263,
      "grad_norm": 0.9776635908283169,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 7263
    },
    {
      "epoch": 0.07264,
      "grad_norm": 0.9280201688532035,
      "learning_rate": 0.003,
      "loss": 4.0058,
      "step": 7264
    },
    {
      "epoch": 0.07265,
      "grad_norm": 1.146251437770367,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 7265
    },
    {
      "epoch": 0.07266,
      "grad_norm": 0.9536640102408181,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 7266
    },
    {
      "epoch": 0.07267,
      "grad_norm": 1.1810792017759155,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 7267
    },
    {
      "epoch": 0.07268,
      "grad_norm": 1.0667635725102285,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 7268
    },
    {
      "epoch": 0.07269,
      "grad_norm": 1.0933436942514816,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 7269
    },
    {
      "epoch": 0.0727,
      "grad_norm": 0.9075857622412058,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 7270
    },
    {
      "epoch": 0.07271,
      "grad_norm": 0.9549614482171724,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 7271
    },
    {
      "epoch": 0.07272,
      "grad_norm": 0.8614083119020385,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 7272
    },
    {
      "epoch": 0.07273,
      "grad_norm": 0.8876688164940039,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 7273
    },
    {
      "epoch": 0.07274,
      "grad_norm": 0.9248498248735642,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 7274
    },
    {
      "epoch": 0.07275,
      "grad_norm": 0.9301192282673498,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 7275
    },
    {
      "epoch": 0.07276,
      "grad_norm": 1.0236905978389461,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 7276
    },
    {
      "epoch": 0.07277,
      "grad_norm": 1.2555861212594497,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 7277
    },
    {
      "epoch": 0.07278,
      "grad_norm": 1.0168850674837941,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 7278
    },
    {
      "epoch": 0.07279,
      "grad_norm": 1.3248139439976,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 7279
    },
    {
      "epoch": 0.0728,
      "grad_norm": 0.9735763353453082,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 7280
    },
    {
      "epoch": 0.07281,
      "grad_norm": 1.1238701240024835,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 7281
    },
    {
      "epoch": 0.07282,
      "grad_norm": 1.075085316193029,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 7282
    },
    {
      "epoch": 0.07283,
      "grad_norm": 1.1149909879984707,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 7283
    },
    {
      "epoch": 0.07284,
      "grad_norm": 1.060193425500397,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 7284
    },
    {
      "epoch": 0.07285,
      "grad_norm": 1.1145512915255615,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 7285
    },
    {
      "epoch": 0.07286,
      "grad_norm": 1.0821681081123302,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 7286
    },
    {
      "epoch": 0.07287,
      "grad_norm": 0.9482005317460671,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 7287
    },
    {
      "epoch": 0.07288,
      "grad_norm": 1.1770572136072466,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 7288
    },
    {
      "epoch": 0.07289,
      "grad_norm": 1.0392539942924648,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 7289
    },
    {
      "epoch": 0.0729,
      "grad_norm": 1.093723077978444,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 7290
    },
    {
      "epoch": 0.07291,
      "grad_norm": 1.2345580518798212,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 7291
    },
    {
      "epoch": 0.07292,
      "grad_norm": 1.066201654699835,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 7292
    },
    {
      "epoch": 0.07293,
      "grad_norm": 1.1510430611988984,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 7293
    },
    {
      "epoch": 0.07294,
      "grad_norm": 0.9783586851033524,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 7294
    },
    {
      "epoch": 0.07295,
      "grad_norm": 1.125641508383537,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 7295
    },
    {
      "epoch": 0.07296,
      "grad_norm": 0.8931193549630414,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 7296
    },
    {
      "epoch": 0.07297,
      "grad_norm": 0.9640861362127288,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 7297
    },
    {
      "epoch": 0.07298,
      "grad_norm": 1.1090770552512412,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 7298
    },
    {
      "epoch": 0.07299,
      "grad_norm": 0.9922013741324502,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 7299
    },
    {
      "epoch": 0.073,
      "grad_norm": 1.0624960804734376,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 7300
    },
    {
      "epoch": 0.07301,
      "grad_norm": 1.0296175568733714,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 7301
    },
    {
      "epoch": 0.07302,
      "grad_norm": 0.877285101418735,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 7302
    },
    {
      "epoch": 0.07303,
      "grad_norm": 0.8404046231538534,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 7303
    },
    {
      "epoch": 0.07304,
      "grad_norm": 0.7275922913634015,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 7304
    },
    {
      "epoch": 0.07305,
      "grad_norm": 0.7396081307701995,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 7305
    },
    {
      "epoch": 0.07306,
      "grad_norm": 0.8600590405301811,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 7306
    },
    {
      "epoch": 0.07307,
      "grad_norm": 1.2179767907272885,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 7307
    },
    {
      "epoch": 0.07308,
      "grad_norm": 1.2171863102376617,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 7308
    },
    {
      "epoch": 0.07309,
      "grad_norm": 0.7498319891103764,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 7309
    },
    {
      "epoch": 0.0731,
      "grad_norm": 0.7626246621724885,
      "learning_rate": 0.003,
      "loss": 4.0045,
      "step": 7310
    },
    {
      "epoch": 0.07311,
      "grad_norm": 0.9806079646889024,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 7311
    },
    {
      "epoch": 0.07312,
      "grad_norm": 1.2649393161573326,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 7312
    },
    {
      "epoch": 0.07313,
      "grad_norm": 1.0944111078253658,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 7313
    },
    {
      "epoch": 0.07314,
      "grad_norm": 1.0625503664966063,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 7314
    },
    {
      "epoch": 0.07315,
      "grad_norm": 0.8920266443424979,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 7315
    },
    {
      "epoch": 0.07316,
      "grad_norm": 0.9411350308589397,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 7316
    },
    {
      "epoch": 0.07317,
      "grad_norm": 1.0177785717592334,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 7317
    },
    {
      "epoch": 0.07318,
      "grad_norm": 1.0697061033409336,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 7318
    },
    {
      "epoch": 0.07319,
      "grad_norm": 1.1560468412807143,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 7319
    },
    {
      "epoch": 0.0732,
      "grad_norm": 0.9848533350053253,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 7320
    },
    {
      "epoch": 0.07321,
      "grad_norm": 1.406641344651784,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 7321
    },
    {
      "epoch": 0.07322,
      "grad_norm": 0.8388161556157484,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 7322
    },
    {
      "epoch": 0.07323,
      "grad_norm": 0.8844904618419197,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 7323
    },
    {
      "epoch": 0.07324,
      "grad_norm": 0.9189035571192447,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 7324
    },
    {
      "epoch": 0.07325,
      "grad_norm": 1.1269111817150788,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 7325
    },
    {
      "epoch": 0.07326,
      "grad_norm": 1.3034147784515224,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 7326
    },
    {
      "epoch": 0.07327,
      "grad_norm": 0.9635421663578897,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 7327
    },
    {
      "epoch": 0.07328,
      "grad_norm": 1.0710552918832759,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 7328
    },
    {
      "epoch": 0.07329,
      "grad_norm": 0.9097624953128945,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 7329
    },
    {
      "epoch": 0.0733,
      "grad_norm": 1.0784018937777076,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 7330
    },
    {
      "epoch": 0.07331,
      "grad_norm": 1.0908726771554587,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 7331
    },
    {
      "epoch": 0.07332,
      "grad_norm": 0.9641065596068242,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 7332
    },
    {
      "epoch": 0.07333,
      "grad_norm": 1.056358933350289,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 7333
    },
    {
      "epoch": 0.07334,
      "grad_norm": 1.0825403734635632,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 7334
    },
    {
      "epoch": 0.07335,
      "grad_norm": 0.996440123195999,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 7335
    },
    {
      "epoch": 0.07336,
      "grad_norm": 1.194382823583778,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 7336
    },
    {
      "epoch": 0.07337,
      "grad_norm": 1.0010816804287952,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 7337
    },
    {
      "epoch": 0.07338,
      "grad_norm": 1.1654307114738909,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 7338
    },
    {
      "epoch": 0.07339,
      "grad_norm": 1.1220075968794316,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 7339
    },
    {
      "epoch": 0.0734,
      "grad_norm": 1.131259039339433,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 7340
    },
    {
      "epoch": 0.07341,
      "grad_norm": 1.175969663125831,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 7341
    },
    {
      "epoch": 0.07342,
      "grad_norm": 0.9696219162610268,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 7342
    },
    {
      "epoch": 0.07343,
      "grad_norm": 0.9225181238457246,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 7343
    },
    {
      "epoch": 0.07344,
      "grad_norm": 0.9472118247787676,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 7344
    },
    {
      "epoch": 0.07345,
      "grad_norm": 1.0731847840368303,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 7345
    },
    {
      "epoch": 0.07346,
      "grad_norm": 1.1511686026889996,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 7346
    },
    {
      "epoch": 0.07347,
      "grad_norm": 0.9402065576595197,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 7347
    },
    {
      "epoch": 0.07348,
      "grad_norm": 0.9633307339305025,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 7348
    },
    {
      "epoch": 0.07349,
      "grad_norm": 1.0646374377407537,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 7349
    },
    {
      "epoch": 0.0735,
      "grad_norm": 1.2040620732152902,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 7350
    },
    {
      "epoch": 0.07351,
      "grad_norm": 0.904348905876666,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 7351
    },
    {
      "epoch": 0.07352,
      "grad_norm": 1.1063199597987452,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 7352
    },
    {
      "epoch": 0.07353,
      "grad_norm": 1.027927831165055,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 7353
    },
    {
      "epoch": 0.07354,
      "grad_norm": 0.9388874030805077,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 7354
    },
    {
      "epoch": 0.07355,
      "grad_norm": 0.9846783510750604,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 7355
    },
    {
      "epoch": 0.07356,
      "grad_norm": 0.9691856508427281,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 7356
    },
    {
      "epoch": 0.07357,
      "grad_norm": 1.0360248831901298,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 7357
    },
    {
      "epoch": 0.07358,
      "grad_norm": 0.9624035012433214,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 7358
    },
    {
      "epoch": 0.07359,
      "grad_norm": 1.2494789612717871,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 7359
    },
    {
      "epoch": 0.0736,
      "grad_norm": 1.1524537864722764,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 7360
    },
    {
      "epoch": 0.07361,
      "grad_norm": 1.0340592810304519,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 7361
    },
    {
      "epoch": 0.07362,
      "grad_norm": 0.8881109632693943,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 7362
    },
    {
      "epoch": 0.07363,
      "grad_norm": 1.0007719234565362,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 7363
    },
    {
      "epoch": 0.07364,
      "grad_norm": 1.1774673252784194,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 7364
    },
    {
      "epoch": 0.07365,
      "grad_norm": 1.0952657927274845,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 7365
    },
    {
      "epoch": 0.07366,
      "grad_norm": 1.1457353470828615,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 7366
    },
    {
      "epoch": 0.07367,
      "grad_norm": 1.068208594063675,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 7367
    },
    {
      "epoch": 0.07368,
      "grad_norm": 1.1209873829364174,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 7368
    },
    {
      "epoch": 0.07369,
      "grad_norm": 0.9611042252278179,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 7369
    },
    {
      "epoch": 0.0737,
      "grad_norm": 1.1681489840918342,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 7370
    },
    {
      "epoch": 0.07371,
      "grad_norm": 1.1438687370427696,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 7371
    },
    {
      "epoch": 0.07372,
      "grad_norm": 1.1163570815698207,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 7372
    },
    {
      "epoch": 0.07373,
      "grad_norm": 0.994703138600529,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 7373
    },
    {
      "epoch": 0.07374,
      "grad_norm": 1.0487042099617259,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 7374
    },
    {
      "epoch": 0.07375,
      "grad_norm": 0.9349117742647232,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 7375
    },
    {
      "epoch": 0.07376,
      "grad_norm": 0.8520163703396579,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 7376
    },
    {
      "epoch": 0.07377,
      "grad_norm": 1.0456006728388636,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 7377
    },
    {
      "epoch": 0.07378,
      "grad_norm": 1.1182333493411414,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 7378
    },
    {
      "epoch": 0.07379,
      "grad_norm": 1.055202759953318,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 7379
    },
    {
      "epoch": 0.0738,
      "grad_norm": 0.9844943498588187,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 7380
    },
    {
      "epoch": 0.07381,
      "grad_norm": 1.0556758756646913,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 7381
    },
    {
      "epoch": 0.07382,
      "grad_norm": 0.939993493011405,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 7382
    },
    {
      "epoch": 0.07383,
      "grad_norm": 0.8597491485943202,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 7383
    },
    {
      "epoch": 0.07384,
      "grad_norm": 0.921647665125338,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 7384
    },
    {
      "epoch": 0.07385,
      "grad_norm": 1.2812955999677622,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 7385
    },
    {
      "epoch": 0.07386,
      "grad_norm": 0.993685614599893,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 7386
    },
    {
      "epoch": 0.07387,
      "grad_norm": 1.027709481484533,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 7387
    },
    {
      "epoch": 0.07388,
      "grad_norm": 0.9013082495547424,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 7388
    },
    {
      "epoch": 0.07389,
      "grad_norm": 0.9109951838256976,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 7389
    },
    {
      "epoch": 0.0739,
      "grad_norm": 1.2343749977944833,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 7390
    },
    {
      "epoch": 0.07391,
      "grad_norm": 0.9658322691066191,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 7391
    },
    {
      "epoch": 0.07392,
      "grad_norm": 1.1277581857498538,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 7392
    },
    {
      "epoch": 0.07393,
      "grad_norm": 1.0979017569203031,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 7393
    },
    {
      "epoch": 0.07394,
      "grad_norm": 1.1170278163443845,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 7394
    },
    {
      "epoch": 0.07395,
      "grad_norm": 1.0993990391688724,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 7395
    },
    {
      "epoch": 0.07396,
      "grad_norm": 1.0848153750483966,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 7396
    },
    {
      "epoch": 0.07397,
      "grad_norm": 1.1759764800565051,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 7397
    },
    {
      "epoch": 0.07398,
      "grad_norm": 0.9099184176925739,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 7398
    },
    {
      "epoch": 0.07399,
      "grad_norm": 0.9851023987142208,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 7399
    },
    {
      "epoch": 0.074,
      "grad_norm": 1.216378322428975,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 7400
    },
    {
      "epoch": 0.07401,
      "grad_norm": 0.880119789550399,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 7401
    },
    {
      "epoch": 0.07402,
      "grad_norm": 0.9513679960235595,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 7402
    },
    {
      "epoch": 0.07403,
      "grad_norm": 1.1554462432688264,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 7403
    },
    {
      "epoch": 0.07404,
      "grad_norm": 1.1015044417016977,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 7404
    },
    {
      "epoch": 0.07405,
      "grad_norm": 1.3070390701211214,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 7405
    },
    {
      "epoch": 0.07406,
      "grad_norm": 0.9272149699930327,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 7406
    },
    {
      "epoch": 0.07407,
      "grad_norm": 1.0566499622445449,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 7407
    },
    {
      "epoch": 0.07408,
      "grad_norm": 0.9492068898970054,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 7408
    },
    {
      "epoch": 0.07409,
      "grad_norm": 0.9649105766310203,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 7409
    },
    {
      "epoch": 0.0741,
      "grad_norm": 1.1043798243043246,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 7410
    },
    {
      "epoch": 0.07411,
      "grad_norm": 1.1607113803566775,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 7411
    },
    {
      "epoch": 0.07412,
      "grad_norm": 0.9327351376075126,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 7412
    },
    {
      "epoch": 0.07413,
      "grad_norm": 1.0657239310703015,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 7413
    },
    {
      "epoch": 0.07414,
      "grad_norm": 1.0370445327105178,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 7414
    },
    {
      "epoch": 0.07415,
      "grad_norm": 1.1291501330975042,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 7415
    },
    {
      "epoch": 0.07416,
      "grad_norm": 1.0563748188952578,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 7416
    },
    {
      "epoch": 0.07417,
      "grad_norm": 0.9426313292975299,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 7417
    },
    {
      "epoch": 0.07418,
      "grad_norm": 1.138322949259601,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 7418
    },
    {
      "epoch": 0.07419,
      "grad_norm": 1.213845231951027,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 7419
    },
    {
      "epoch": 0.0742,
      "grad_norm": 0.9160208863216656,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 7420
    },
    {
      "epoch": 0.07421,
      "grad_norm": 0.9459537396238892,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 7421
    },
    {
      "epoch": 0.07422,
      "grad_norm": 0.9896087479545597,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 7422
    },
    {
      "epoch": 0.07423,
      "grad_norm": 1.029001637107005,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 7423
    },
    {
      "epoch": 0.07424,
      "grad_norm": 0.966524850109166,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 7424
    },
    {
      "epoch": 0.07425,
      "grad_norm": 1.0858257368928335,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 7425
    },
    {
      "epoch": 0.07426,
      "grad_norm": 0.9810888238304923,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 7426
    },
    {
      "epoch": 0.07427,
      "grad_norm": 1.1559445528412031,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 7427
    },
    {
      "epoch": 0.07428,
      "grad_norm": 1.1236073459063627,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 7428
    },
    {
      "epoch": 0.07429,
      "grad_norm": 1.096676807770383,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 7429
    },
    {
      "epoch": 0.0743,
      "grad_norm": 1.0713521198381804,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 7430
    },
    {
      "epoch": 0.07431,
      "grad_norm": 1.0003185699822545,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 7431
    },
    {
      "epoch": 0.07432,
      "grad_norm": 1.242346832405972,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 7432
    },
    {
      "epoch": 0.07433,
      "grad_norm": 1.0630507457181497,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 7433
    },
    {
      "epoch": 0.07434,
      "grad_norm": 1.1654066987813088,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 7434
    },
    {
      "epoch": 0.07435,
      "grad_norm": 1.2683692736396797,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 7435
    },
    {
      "epoch": 0.07436,
      "grad_norm": 0.945218194969709,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 7436
    },
    {
      "epoch": 0.07437,
      "grad_norm": 1.097019618654518,
      "learning_rate": 0.003,
      "loss": 4.0088,
      "step": 7437
    },
    {
      "epoch": 0.07438,
      "grad_norm": 1.2074577366475394,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 7438
    },
    {
      "epoch": 0.07439,
      "grad_norm": 0.9431501487823388,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 7439
    },
    {
      "epoch": 0.0744,
      "grad_norm": 1.1411961430256892,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 7440
    },
    {
      "epoch": 0.07441,
      "grad_norm": 0.945382643731931,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 7441
    },
    {
      "epoch": 0.07442,
      "grad_norm": 0.9584211239670143,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 7442
    },
    {
      "epoch": 0.07443,
      "grad_norm": 0.8936679140385173,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 7443
    },
    {
      "epoch": 0.07444,
      "grad_norm": 0.8616182361179869,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 7444
    },
    {
      "epoch": 0.07445,
      "grad_norm": 1.112391031977807,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 7445
    },
    {
      "epoch": 0.07446,
      "grad_norm": 1.260904358901806,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 7446
    },
    {
      "epoch": 0.07447,
      "grad_norm": 0.9143344109603971,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 7447
    },
    {
      "epoch": 0.07448,
      "grad_norm": 0.9777877903133408,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 7448
    },
    {
      "epoch": 0.07449,
      "grad_norm": 1.0092330290166718,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 7449
    },
    {
      "epoch": 0.0745,
      "grad_norm": 1.116824586523927,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 7450
    },
    {
      "epoch": 0.07451,
      "grad_norm": 1.0158873706838591,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 7451
    },
    {
      "epoch": 0.07452,
      "grad_norm": 1.0683848996605232,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 7452
    },
    {
      "epoch": 0.07453,
      "grad_norm": 1.0005773823548063,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 7453
    },
    {
      "epoch": 0.07454,
      "grad_norm": 1.1094763576693252,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 7454
    },
    {
      "epoch": 0.07455,
      "grad_norm": 1.0977415415623035,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 7455
    },
    {
      "epoch": 0.07456,
      "grad_norm": 1.2070613083727133,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 7456
    },
    {
      "epoch": 0.07457,
      "grad_norm": 1.033669789154854,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 7457
    },
    {
      "epoch": 0.07458,
      "grad_norm": 0.9974734919174871,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 7458
    },
    {
      "epoch": 0.07459,
      "grad_norm": 1.1596224019873076,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 7459
    },
    {
      "epoch": 0.0746,
      "grad_norm": 1.0558735224515694,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 7460
    },
    {
      "epoch": 0.07461,
      "grad_norm": 1.1013658395166883,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 7461
    },
    {
      "epoch": 0.07462,
      "grad_norm": 1.099153298296693,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 7462
    },
    {
      "epoch": 0.07463,
      "grad_norm": 0.9155991610369465,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 7463
    },
    {
      "epoch": 0.07464,
      "grad_norm": 0.9379208043653106,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 7464
    },
    {
      "epoch": 0.07465,
      "grad_norm": 1.0372587578155243,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 7465
    },
    {
      "epoch": 0.07466,
      "grad_norm": 1.3132084128992856,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 7466
    },
    {
      "epoch": 0.07467,
      "grad_norm": 0.933995203349761,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 7467
    },
    {
      "epoch": 0.07468,
      "grad_norm": 1.0967227098443246,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 7468
    },
    {
      "epoch": 0.07469,
      "grad_norm": 0.8994513242913562,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 7469
    },
    {
      "epoch": 0.0747,
      "grad_norm": 0.8582057247678115,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 7470
    },
    {
      "epoch": 0.07471,
      "grad_norm": 0.9750254675432543,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 7471
    },
    {
      "epoch": 0.07472,
      "grad_norm": 1.0758291745603603,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 7472
    },
    {
      "epoch": 0.07473,
      "grad_norm": 1.2870897431186328,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 7473
    },
    {
      "epoch": 0.07474,
      "grad_norm": 0.8579070231808928,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 7474
    },
    {
      "epoch": 0.07475,
      "grad_norm": 0.9738156550036234,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 7475
    },
    {
      "epoch": 0.07476,
      "grad_norm": 1.045108915912123,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 7476
    },
    {
      "epoch": 0.07477,
      "grad_norm": 1.094542247261385,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 7477
    },
    {
      "epoch": 0.07478,
      "grad_norm": 0.8629572990188785,
      "learning_rate": 0.003,
      "loss": 4.0008,
      "step": 7478
    },
    {
      "epoch": 0.07479,
      "grad_norm": 0.9292854361264181,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 7479
    },
    {
      "epoch": 0.0748,
      "grad_norm": 1.0705526760945827,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 7480
    },
    {
      "epoch": 0.07481,
      "grad_norm": 1.0359796271584742,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 7481
    },
    {
      "epoch": 0.07482,
      "grad_norm": 1.232770848270821,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 7482
    },
    {
      "epoch": 0.07483,
      "grad_norm": 1.0817607398369629,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 7483
    },
    {
      "epoch": 0.07484,
      "grad_norm": 1.2858180984611243,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 7484
    },
    {
      "epoch": 0.07485,
      "grad_norm": 0.9867732155173473,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 7485
    },
    {
      "epoch": 0.07486,
      "grad_norm": 1.0647719783345513,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 7486
    },
    {
      "epoch": 0.07487,
      "grad_norm": 0.9563519763714589,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 7487
    },
    {
      "epoch": 0.07488,
      "grad_norm": 1.081168855348119,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 7488
    },
    {
      "epoch": 0.07489,
      "grad_norm": 0.9057881307635063,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 7489
    },
    {
      "epoch": 0.0749,
      "grad_norm": 0.941257228213405,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 7490
    },
    {
      "epoch": 0.07491,
      "grad_norm": 1.023677883109361,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 7491
    },
    {
      "epoch": 0.07492,
      "grad_norm": 1.0985843828113449,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 7492
    },
    {
      "epoch": 0.07493,
      "grad_norm": 1.1291894542659202,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 7493
    },
    {
      "epoch": 0.07494,
      "grad_norm": 1.046782915344204,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 7494
    },
    {
      "epoch": 0.07495,
      "grad_norm": 1.2154732983788594,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 7495
    },
    {
      "epoch": 0.07496,
      "grad_norm": 1.1000284508004632,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 7496
    },
    {
      "epoch": 0.07497,
      "grad_norm": 1.1040574207112384,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 7497
    },
    {
      "epoch": 0.07498,
      "grad_norm": 0.8948022242164695,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 7498
    },
    {
      "epoch": 0.07499,
      "grad_norm": 1.0258546162106021,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 7499
    },
    {
      "epoch": 0.075,
      "grad_norm": 1.1158118517557325,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 7500
    },
    {
      "epoch": 0.07501,
      "grad_norm": 1.0543780189380676,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 7501
    },
    {
      "epoch": 0.07502,
      "grad_norm": 1.1398746708324812,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 7502
    },
    {
      "epoch": 0.07503,
      "grad_norm": 1.17233757471888,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 7503
    },
    {
      "epoch": 0.07504,
      "grad_norm": 1.1747348217017677,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 7504
    },
    {
      "epoch": 0.07505,
      "grad_norm": 0.8995993583616543,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 7505
    },
    {
      "epoch": 0.07506,
      "grad_norm": 0.9337832043650974,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 7506
    },
    {
      "epoch": 0.07507,
      "grad_norm": 0.9695999584916757,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 7507
    },
    {
      "epoch": 0.07508,
      "grad_norm": 1.1638077160586862,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 7508
    },
    {
      "epoch": 0.07509,
      "grad_norm": 0.9874287876840504,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 7509
    },
    {
      "epoch": 0.0751,
      "grad_norm": 0.9776311060010476,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 7510
    },
    {
      "epoch": 0.07511,
      "grad_norm": 1.1033486233571166,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 7511
    },
    {
      "epoch": 0.07512,
      "grad_norm": 0.8907632335623773,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 7512
    },
    {
      "epoch": 0.07513,
      "grad_norm": 0.9988954487137791,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 7513
    },
    {
      "epoch": 0.07514,
      "grad_norm": 1.194395962502461,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 7514
    },
    {
      "epoch": 0.07515,
      "grad_norm": 1.156162880522299,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 7515
    },
    {
      "epoch": 0.07516,
      "grad_norm": 1.2727170425623708,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 7516
    },
    {
      "epoch": 0.07517,
      "grad_norm": 1.1565889833590228,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 7517
    },
    {
      "epoch": 0.07518,
      "grad_norm": 0.8708468314319205,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 7518
    },
    {
      "epoch": 0.07519,
      "grad_norm": 0.9953433252661223,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 7519
    },
    {
      "epoch": 0.0752,
      "grad_norm": 1.0940459643263882,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 7520
    },
    {
      "epoch": 0.07521,
      "grad_norm": 0.928716112619678,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 7521
    },
    {
      "epoch": 0.07522,
      "grad_norm": 0.9818555888838302,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 7522
    },
    {
      "epoch": 0.07523,
      "grad_norm": 1.2068474109803409,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 7523
    },
    {
      "epoch": 0.07524,
      "grad_norm": 1.01597504610466,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 7524
    },
    {
      "epoch": 0.07525,
      "grad_norm": 1.2715734649300354,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 7525
    },
    {
      "epoch": 0.07526,
      "grad_norm": 0.7870088972562932,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 7526
    },
    {
      "epoch": 0.07527,
      "grad_norm": 0.9106993971561816,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 7527
    },
    {
      "epoch": 0.07528,
      "grad_norm": 0.8636142458933644,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 7528
    },
    {
      "epoch": 0.07529,
      "grad_norm": 0.9616268692995935,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 7529
    },
    {
      "epoch": 0.0753,
      "grad_norm": 1.114538018638255,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 7530
    },
    {
      "epoch": 0.07531,
      "grad_norm": 0.9879931527488544,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 7531
    },
    {
      "epoch": 0.07532,
      "grad_norm": 1.0655313216663054,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 7532
    },
    {
      "epoch": 0.07533,
      "grad_norm": 0.8780149908265403,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 7533
    },
    {
      "epoch": 0.07534,
      "grad_norm": 0.8784040460131582,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 7534
    },
    {
      "epoch": 0.07535,
      "grad_norm": 0.8518380464505314,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 7535
    },
    {
      "epoch": 0.07536,
      "grad_norm": 0.9098902162790059,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 7536
    },
    {
      "epoch": 0.07537,
      "grad_norm": 1.143562497559037,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 7537
    },
    {
      "epoch": 0.07538,
      "grad_norm": 1.2254729496457664,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 7538
    },
    {
      "epoch": 0.07539,
      "grad_norm": 1.043842367086055,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 7539
    },
    {
      "epoch": 0.0754,
      "grad_norm": 1.21725440776156,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 7540
    },
    {
      "epoch": 0.07541,
      "grad_norm": 0.9295788359152493,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 7541
    },
    {
      "epoch": 0.07542,
      "grad_norm": 1.130473398219038,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 7542
    },
    {
      "epoch": 0.07543,
      "grad_norm": 0.9937582722105303,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 7543
    },
    {
      "epoch": 0.07544,
      "grad_norm": 1.2560834000797938,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 7544
    },
    {
      "epoch": 0.07545,
      "grad_norm": 1.0050728570569878,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 7545
    },
    {
      "epoch": 0.07546,
      "grad_norm": 1.0720793211640665,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 7546
    },
    {
      "epoch": 0.07547,
      "grad_norm": 0.9113278300880503,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 7547
    },
    {
      "epoch": 0.07548,
      "grad_norm": 0.7765738432773017,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 7548
    },
    {
      "epoch": 0.07549,
      "grad_norm": 0.912954245340519,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 7549
    },
    {
      "epoch": 0.0755,
      "grad_norm": 1.1357200728898464,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 7550
    },
    {
      "epoch": 0.07551,
      "grad_norm": 1.1645892480065512,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 7551
    },
    {
      "epoch": 0.07552,
      "grad_norm": 1.0759538967371787,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 7552
    },
    {
      "epoch": 0.07553,
      "grad_norm": 1.113298651168989,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 7553
    },
    {
      "epoch": 0.07554,
      "grad_norm": 1.3129530629673276,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 7554
    },
    {
      "epoch": 0.07555,
      "grad_norm": 1.0195391886187795,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 7555
    },
    {
      "epoch": 0.07556,
      "grad_norm": 1.1261424419589618,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 7556
    },
    {
      "epoch": 0.07557,
      "grad_norm": 0.9078457564330181,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 7557
    },
    {
      "epoch": 0.07558,
      "grad_norm": 1.1105037206557775,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 7558
    },
    {
      "epoch": 0.07559,
      "grad_norm": 1.084878893896586,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 7559
    },
    {
      "epoch": 0.0756,
      "grad_norm": 0.9422492493376851,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 7560
    },
    {
      "epoch": 0.07561,
      "grad_norm": 0.9472473971962481,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 7561
    },
    {
      "epoch": 0.07562,
      "grad_norm": 1.0141863361647219,
      "learning_rate": 0.003,
      "loss": 4.0163,
      "step": 7562
    },
    {
      "epoch": 0.07563,
      "grad_norm": 1.209943999383626,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 7563
    },
    {
      "epoch": 0.07564,
      "grad_norm": 1.040474950950908,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 7564
    },
    {
      "epoch": 0.07565,
      "grad_norm": 1.041564684801342,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 7565
    },
    {
      "epoch": 0.07566,
      "grad_norm": 0.9211223111374635,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 7566
    },
    {
      "epoch": 0.07567,
      "grad_norm": 0.9557313493057552,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 7567
    },
    {
      "epoch": 0.07568,
      "grad_norm": 1.0157216311700132,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 7568
    },
    {
      "epoch": 0.07569,
      "grad_norm": 1.0891520113381792,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 7569
    },
    {
      "epoch": 0.0757,
      "grad_norm": 1.035599436574519,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 7570
    },
    {
      "epoch": 0.07571,
      "grad_norm": 1.1416705208329894,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 7571
    },
    {
      "epoch": 0.07572,
      "grad_norm": 1.1983465038651324,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 7572
    },
    {
      "epoch": 0.07573,
      "grad_norm": 0.8376450895449468,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 7573
    },
    {
      "epoch": 0.07574,
      "grad_norm": 0.7440289348204431,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 7574
    },
    {
      "epoch": 0.07575,
      "grad_norm": 0.6603077120225412,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 7575
    },
    {
      "epoch": 0.07576,
      "grad_norm": 0.7920785949672058,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 7576
    },
    {
      "epoch": 0.07577,
      "grad_norm": 1.0268729068383955,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 7577
    },
    {
      "epoch": 0.07578,
      "grad_norm": 1.1368625436467688,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 7578
    },
    {
      "epoch": 0.07579,
      "grad_norm": 1.008982576200628,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 7579
    },
    {
      "epoch": 0.0758,
      "grad_norm": 1.3316307903450357,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 7580
    },
    {
      "epoch": 0.07581,
      "grad_norm": 0.9546023266380341,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 7581
    },
    {
      "epoch": 0.07582,
      "grad_norm": 1.028351315894607,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 7582
    },
    {
      "epoch": 0.07583,
      "grad_norm": 1.1084447205745265,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 7583
    },
    {
      "epoch": 0.07584,
      "grad_norm": 1.083029995624086,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 7584
    },
    {
      "epoch": 0.07585,
      "grad_norm": 1.0956918067982808,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 7585
    },
    {
      "epoch": 0.07586,
      "grad_norm": 1.0704482847092711,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 7586
    },
    {
      "epoch": 0.07587,
      "grad_norm": 1.0248155370988479,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 7587
    },
    {
      "epoch": 0.07588,
      "grad_norm": 0.9469007252567827,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 7588
    },
    {
      "epoch": 0.07589,
      "grad_norm": 0.9083602594004783,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 7589
    },
    {
      "epoch": 0.0759,
      "grad_norm": 1.0888081212751122,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 7590
    },
    {
      "epoch": 0.07591,
      "grad_norm": 1.1286869611060864,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 7591
    },
    {
      "epoch": 0.07592,
      "grad_norm": 1.1994073883546317,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 7592
    },
    {
      "epoch": 0.07593,
      "grad_norm": 0.930854156102424,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 7593
    },
    {
      "epoch": 0.07594,
      "grad_norm": 0.9949944707269984,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 7594
    },
    {
      "epoch": 0.07595,
      "grad_norm": 1.0566054746262994,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 7595
    },
    {
      "epoch": 0.07596,
      "grad_norm": 0.9893650668367953,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 7596
    },
    {
      "epoch": 0.07597,
      "grad_norm": 1.1346833349811905,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 7597
    },
    {
      "epoch": 0.07598,
      "grad_norm": 1.0292720315909252,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 7598
    },
    {
      "epoch": 0.07599,
      "grad_norm": 1.152689224088955,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 7599
    },
    {
      "epoch": 0.076,
      "grad_norm": 1.0383151228598986,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 7600
    },
    {
      "epoch": 0.07601,
      "grad_norm": 1.047117789348393,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 7601
    },
    {
      "epoch": 0.07602,
      "grad_norm": 1.106052696242171,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 7602
    },
    {
      "epoch": 0.07603,
      "grad_norm": 1.0073145436596014,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 7603
    },
    {
      "epoch": 0.07604,
      "grad_norm": 1.0664230896712803,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 7604
    },
    {
      "epoch": 0.07605,
      "grad_norm": 0.9753888567376868,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 7605
    },
    {
      "epoch": 0.07606,
      "grad_norm": 1.2030307927813533,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 7606
    },
    {
      "epoch": 0.07607,
      "grad_norm": 0.9086575357118534,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 7607
    },
    {
      "epoch": 0.07608,
      "grad_norm": 0.9273700226274223,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 7608
    },
    {
      "epoch": 0.07609,
      "grad_norm": 1.003689493135648,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 7609
    },
    {
      "epoch": 0.0761,
      "grad_norm": 1.1377269625068565,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 7610
    },
    {
      "epoch": 0.07611,
      "grad_norm": 0.9384559805000248,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 7611
    },
    {
      "epoch": 0.07612,
      "grad_norm": 1.1271298131099274,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 7612
    },
    {
      "epoch": 0.07613,
      "grad_norm": 1.1197631113115567,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 7613
    },
    {
      "epoch": 0.07614,
      "grad_norm": 1.0895475603662517,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 7614
    },
    {
      "epoch": 0.07615,
      "grad_norm": 1.1379640727683638,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 7615
    },
    {
      "epoch": 0.07616,
      "grad_norm": 1.0509591762451105,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 7616
    },
    {
      "epoch": 0.07617,
      "grad_norm": 1.0514288577627249,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 7617
    },
    {
      "epoch": 0.07618,
      "grad_norm": 0.9463880932847941,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 7618
    },
    {
      "epoch": 0.07619,
      "grad_norm": 1.0151178915603503,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 7619
    },
    {
      "epoch": 0.0762,
      "grad_norm": 1.2638113293346065,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 7620
    },
    {
      "epoch": 0.07621,
      "grad_norm": 0.8743906611118283,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 7621
    },
    {
      "epoch": 0.07622,
      "grad_norm": 0.9376371308176443,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 7622
    },
    {
      "epoch": 0.07623,
      "grad_norm": 1.2162047119192658,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 7623
    },
    {
      "epoch": 0.07624,
      "grad_norm": 0.8081909472078085,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 7624
    },
    {
      "epoch": 0.07625,
      "grad_norm": 0.9250877782038973,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 7625
    },
    {
      "epoch": 0.07626,
      "grad_norm": 1.105895370815446,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 7626
    },
    {
      "epoch": 0.07627,
      "grad_norm": 1.0603656758381923,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 7627
    },
    {
      "epoch": 0.07628,
      "grad_norm": 1.2118848294290039,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 7628
    },
    {
      "epoch": 0.07629,
      "grad_norm": 0.9491546824024952,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 7629
    },
    {
      "epoch": 0.0763,
      "grad_norm": 1.1470367440434606,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 7630
    },
    {
      "epoch": 0.07631,
      "grad_norm": 0.9750737788207199,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 7631
    },
    {
      "epoch": 0.07632,
      "grad_norm": 0.9223121244351081,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 7632
    },
    {
      "epoch": 0.07633,
      "grad_norm": 0.9447939771060309,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 7633
    },
    {
      "epoch": 0.07634,
      "grad_norm": 1.0656262121811246,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 7634
    },
    {
      "epoch": 0.07635,
      "grad_norm": 1.1293493421594012,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 7635
    },
    {
      "epoch": 0.07636,
      "grad_norm": 1.1936557881689314,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 7636
    },
    {
      "epoch": 0.07637,
      "grad_norm": 0.9929307055596185,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 7637
    },
    {
      "epoch": 0.07638,
      "grad_norm": 1.0228636969536977,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 7638
    },
    {
      "epoch": 0.07639,
      "grad_norm": 0.9823441975383835,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 7639
    },
    {
      "epoch": 0.0764,
      "grad_norm": 1.1338997019966783,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 7640
    },
    {
      "epoch": 0.07641,
      "grad_norm": 1.0093913734568616,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 7641
    },
    {
      "epoch": 0.07642,
      "grad_norm": 1.125461302352058,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 7642
    },
    {
      "epoch": 0.07643,
      "grad_norm": 1.0514677154433425,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 7643
    },
    {
      "epoch": 0.07644,
      "grad_norm": 1.0553593637554315,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 7644
    },
    {
      "epoch": 0.07645,
      "grad_norm": 0.9805977399864129,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 7645
    },
    {
      "epoch": 0.07646,
      "grad_norm": 1.0299202945868386,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 7646
    },
    {
      "epoch": 0.07647,
      "grad_norm": 1.3176337185067677,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 7647
    },
    {
      "epoch": 0.07648,
      "grad_norm": 1.1005654564479557,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 7648
    },
    {
      "epoch": 0.07649,
      "grad_norm": 1.0905324219896768,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 7649
    },
    {
      "epoch": 0.0765,
      "grad_norm": 0.8802249156939694,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 7650
    },
    {
      "epoch": 0.07651,
      "grad_norm": 0.9072009257031926,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 7651
    },
    {
      "epoch": 0.07652,
      "grad_norm": 0.9346164551384647,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 7652
    },
    {
      "epoch": 0.07653,
      "grad_norm": 0.980298089231196,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 7653
    },
    {
      "epoch": 0.07654,
      "grad_norm": 1.0968929008117443,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 7654
    },
    {
      "epoch": 0.07655,
      "grad_norm": 0.9601657405610515,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 7655
    },
    {
      "epoch": 0.07656,
      "grad_norm": 1.258852527855847,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 7656
    },
    {
      "epoch": 0.07657,
      "grad_norm": 0.8924445280055346,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 7657
    },
    {
      "epoch": 0.07658,
      "grad_norm": 1.016577242205408,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 7658
    },
    {
      "epoch": 0.07659,
      "grad_norm": 1.1676547370839572,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 7659
    },
    {
      "epoch": 0.0766,
      "grad_norm": 1.0842253182708932,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 7660
    },
    {
      "epoch": 0.07661,
      "grad_norm": 1.233560763252066,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 7661
    },
    {
      "epoch": 0.07662,
      "grad_norm": 1.0033408895801803,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 7662
    },
    {
      "epoch": 0.07663,
      "grad_norm": 1.2643790027991915,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 7663
    },
    {
      "epoch": 0.07664,
      "grad_norm": 1.023325848782995,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 7664
    },
    {
      "epoch": 0.07665,
      "grad_norm": 1.0799510581720673,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 7665
    },
    {
      "epoch": 0.07666,
      "grad_norm": 1.108909756809187,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 7666
    },
    {
      "epoch": 0.07667,
      "grad_norm": 0.8955114573010688,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 7667
    },
    {
      "epoch": 0.07668,
      "grad_norm": 0.8923297593100042,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 7668
    },
    {
      "epoch": 0.07669,
      "grad_norm": 1.0249046055145887,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 7669
    },
    {
      "epoch": 0.0767,
      "grad_norm": 1.2053116597026634,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 7670
    },
    {
      "epoch": 0.07671,
      "grad_norm": 1.0082941639633163,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 7671
    },
    {
      "epoch": 0.07672,
      "grad_norm": 1.2734345360500605,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 7672
    },
    {
      "epoch": 0.07673,
      "grad_norm": 0.9281784475022431,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 7673
    },
    {
      "epoch": 0.07674,
      "grad_norm": 1.0050293522340423,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 7674
    },
    {
      "epoch": 0.07675,
      "grad_norm": 1.1578682951290742,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 7675
    },
    {
      "epoch": 0.07676,
      "grad_norm": 1.0107218219879721,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 7676
    },
    {
      "epoch": 0.07677,
      "grad_norm": 1.122086603391903,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 7677
    },
    {
      "epoch": 0.07678,
      "grad_norm": 1.0817490316966227,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 7678
    },
    {
      "epoch": 0.07679,
      "grad_norm": 1.0060234752712105,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 7679
    },
    {
      "epoch": 0.0768,
      "grad_norm": 1.0542727480500718,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 7680
    },
    {
      "epoch": 0.07681,
      "grad_norm": 0.9906102704846765,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 7681
    },
    {
      "epoch": 0.07682,
      "grad_norm": 1.1799698353197907,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 7682
    },
    {
      "epoch": 0.07683,
      "grad_norm": 0.9827379813572511,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 7683
    },
    {
      "epoch": 0.07684,
      "grad_norm": 1.0627107640985607,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 7684
    },
    {
      "epoch": 0.07685,
      "grad_norm": 0.8775795962977772,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 7685
    },
    {
      "epoch": 0.07686,
      "grad_norm": 0.7639644005133086,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 7686
    },
    {
      "epoch": 0.07687,
      "grad_norm": 0.8605351853455829,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 7687
    },
    {
      "epoch": 0.07688,
      "grad_norm": 0.9162241897881261,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 7688
    },
    {
      "epoch": 0.07689,
      "grad_norm": 1.2441994585763008,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 7689
    },
    {
      "epoch": 0.0769,
      "grad_norm": 1.0783654477764117,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 7690
    },
    {
      "epoch": 0.07691,
      "grad_norm": 0.9813259733202012,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 7691
    },
    {
      "epoch": 0.07692,
      "grad_norm": 1.1132849435616365,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 7692
    },
    {
      "epoch": 0.07693,
      "grad_norm": 0.968186114392162,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 7693
    },
    {
      "epoch": 0.07694,
      "grad_norm": 1.1180368633304685,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 7694
    },
    {
      "epoch": 0.07695,
      "grad_norm": 1.018150504213498,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 7695
    },
    {
      "epoch": 0.07696,
      "grad_norm": 1.1134933391530109,
      "learning_rate": 0.003,
      "loss": 4.0017,
      "step": 7696
    },
    {
      "epoch": 0.07697,
      "grad_norm": 1.112740567601611,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 7697
    },
    {
      "epoch": 0.07698,
      "grad_norm": 1.035646453959349,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 7698
    },
    {
      "epoch": 0.07699,
      "grad_norm": 1.1776754763679083,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 7699
    },
    {
      "epoch": 0.077,
      "grad_norm": 1.0846924700885932,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 7700
    },
    {
      "epoch": 0.07701,
      "grad_norm": 1.1355756172933456,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 7701
    },
    {
      "epoch": 0.07702,
      "grad_norm": 1.2445054567640432,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 7702
    },
    {
      "epoch": 0.07703,
      "grad_norm": 0.778195828814493,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 7703
    },
    {
      "epoch": 0.07704,
      "grad_norm": 0.7695651423057185,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 7704
    },
    {
      "epoch": 0.07705,
      "grad_norm": 0.8208723519118769,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 7705
    },
    {
      "epoch": 0.07706,
      "grad_norm": 1.0591846703576349,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 7706
    },
    {
      "epoch": 0.07707,
      "grad_norm": 1.4331501433140448,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 7707
    },
    {
      "epoch": 0.07708,
      "grad_norm": 0.8586416225127098,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 7708
    },
    {
      "epoch": 0.07709,
      "grad_norm": 0.9896715342897399,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 7709
    },
    {
      "epoch": 0.0771,
      "grad_norm": 1.0247102942561306,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 7710
    },
    {
      "epoch": 0.07711,
      "grad_norm": 0.9573824195950065,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 7711
    },
    {
      "epoch": 0.07712,
      "grad_norm": 0.9223979651594529,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 7712
    },
    {
      "epoch": 0.07713,
      "grad_norm": 1.123004149058808,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 7713
    },
    {
      "epoch": 0.07714,
      "grad_norm": 1.1362346630117264,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 7714
    },
    {
      "epoch": 0.07715,
      "grad_norm": 0.9873091975110139,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 7715
    },
    {
      "epoch": 0.07716,
      "grad_norm": 1.1616502010150347,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 7716
    },
    {
      "epoch": 0.07717,
      "grad_norm": 1.1465651911213932,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 7717
    },
    {
      "epoch": 0.07718,
      "grad_norm": 1.07816656816876,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 7718
    },
    {
      "epoch": 0.07719,
      "grad_norm": 0.879915285523726,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 7719
    },
    {
      "epoch": 0.0772,
      "grad_norm": 0.8694587190603413,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 7720
    },
    {
      "epoch": 0.07721,
      "grad_norm": 0.9644244720161652,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 7721
    },
    {
      "epoch": 0.07722,
      "grad_norm": 1.0848954562449826,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 7722
    },
    {
      "epoch": 0.07723,
      "grad_norm": 1.2394893053064837,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 7723
    },
    {
      "epoch": 0.07724,
      "grad_norm": 0.9558592147764633,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 7724
    },
    {
      "epoch": 0.07725,
      "grad_norm": 1.1959360495832247,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 7725
    },
    {
      "epoch": 0.07726,
      "grad_norm": 0.9517967220511826,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 7726
    },
    {
      "epoch": 0.07727,
      "grad_norm": 1.1579653372604413,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 7727
    },
    {
      "epoch": 0.07728,
      "grad_norm": 1.1068848248552594,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 7728
    },
    {
      "epoch": 0.07729,
      "grad_norm": 0.9527663111508775,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 7729
    },
    {
      "epoch": 0.0773,
      "grad_norm": 0.82211244826477,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 7730
    },
    {
      "epoch": 0.07731,
      "grad_norm": 0.8528406417127462,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 7731
    },
    {
      "epoch": 0.07732,
      "grad_norm": 0.9703295806343482,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 7732
    },
    {
      "epoch": 0.07733,
      "grad_norm": 1.233288598154536,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 7733
    },
    {
      "epoch": 0.07734,
      "grad_norm": 1.1910360013875985,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 7734
    },
    {
      "epoch": 0.07735,
      "grad_norm": 1.1528280039800138,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 7735
    },
    {
      "epoch": 0.07736,
      "grad_norm": 1.163933383869731,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 7736
    },
    {
      "epoch": 0.07737,
      "grad_norm": 0.9130491112837049,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 7737
    },
    {
      "epoch": 0.07738,
      "grad_norm": 0.9070020333481247,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 7738
    },
    {
      "epoch": 0.07739,
      "grad_norm": 0.9956178440672993,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 7739
    },
    {
      "epoch": 0.0774,
      "grad_norm": 1.2016421176014782,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 7740
    },
    {
      "epoch": 0.07741,
      "grad_norm": 1.0114172630176164,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 7741
    },
    {
      "epoch": 0.07742,
      "grad_norm": 1.0767057084476068,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 7742
    },
    {
      "epoch": 0.07743,
      "grad_norm": 1.140758765621141,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 7743
    },
    {
      "epoch": 0.07744,
      "grad_norm": 0.941795514834158,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 7744
    },
    {
      "epoch": 0.07745,
      "grad_norm": 0.947968983480798,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 7745
    },
    {
      "epoch": 0.07746,
      "grad_norm": 1.1438558351135404,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 7746
    },
    {
      "epoch": 0.07747,
      "grad_norm": 1.1271433369048343,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 7747
    },
    {
      "epoch": 0.07748,
      "grad_norm": 1.068011422911948,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 7748
    },
    {
      "epoch": 0.07749,
      "grad_norm": 1.1673301793274091,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 7749
    },
    {
      "epoch": 0.0775,
      "grad_norm": 0.9871201697529424,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 7750
    },
    {
      "epoch": 0.07751,
      "grad_norm": 1.1865630409028847,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 7751
    },
    {
      "epoch": 0.07752,
      "grad_norm": 0.9135508252960873,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 7752
    },
    {
      "epoch": 0.07753,
      "grad_norm": 1.0491466617434224,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 7753
    },
    {
      "epoch": 0.07754,
      "grad_norm": 1.0597062148757896,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 7754
    },
    {
      "epoch": 0.07755,
      "grad_norm": 1.0522100404904275,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 7755
    },
    {
      "epoch": 0.07756,
      "grad_norm": 1.1298394191949892,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 7756
    },
    {
      "epoch": 0.07757,
      "grad_norm": 0.9473952380257887,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 7757
    },
    {
      "epoch": 0.07758,
      "grad_norm": 1.0741481447103787,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 7758
    },
    {
      "epoch": 0.07759,
      "grad_norm": 1.1033185053370784,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 7759
    },
    {
      "epoch": 0.0776,
      "grad_norm": 0.946764770560814,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 7760
    },
    {
      "epoch": 0.07761,
      "grad_norm": 1.1024391112725458,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 7761
    },
    {
      "epoch": 0.07762,
      "grad_norm": 1.0452461471198484,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 7762
    },
    {
      "epoch": 0.07763,
      "grad_norm": 1.1390912229610954,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 7763
    },
    {
      "epoch": 0.07764,
      "grad_norm": 1.0090027084277429,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 7764
    },
    {
      "epoch": 0.07765,
      "grad_norm": 1.1375674959522515,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 7765
    },
    {
      "epoch": 0.07766,
      "grad_norm": 1.2062883449499884,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 7766
    },
    {
      "epoch": 0.07767,
      "grad_norm": 1.1804562993590413,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 7767
    },
    {
      "epoch": 0.07768,
      "grad_norm": 1.0684635284018162,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 7768
    },
    {
      "epoch": 0.07769,
      "grad_norm": 1.1159137124746297,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 7769
    },
    {
      "epoch": 0.0777,
      "grad_norm": 1.0678717136438307,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 7770
    },
    {
      "epoch": 0.07771,
      "grad_norm": 0.9559701342030749,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 7771
    },
    {
      "epoch": 0.07772,
      "grad_norm": 1.0062287758341482,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 7772
    },
    {
      "epoch": 0.07773,
      "grad_norm": 1.0966855486122142,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 7773
    },
    {
      "epoch": 0.07774,
      "grad_norm": 0.9558412183434721,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 7774
    },
    {
      "epoch": 0.07775,
      "grad_norm": 1.2137613391546822,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 7775
    },
    {
      "epoch": 0.07776,
      "grad_norm": 0.7820218930391715,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 7776
    },
    {
      "epoch": 0.07777,
      "grad_norm": 0.7196952870861936,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 7777
    },
    {
      "epoch": 0.07778,
      "grad_norm": 0.9999098617704734,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 7778
    },
    {
      "epoch": 0.07779,
      "grad_norm": 1.4627741712221116,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 7779
    },
    {
      "epoch": 0.0778,
      "grad_norm": 0.9529840352238272,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 7780
    },
    {
      "epoch": 0.07781,
      "grad_norm": 1.2285518753842994,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 7781
    },
    {
      "epoch": 0.07782,
      "grad_norm": 1.1404528733856336,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 7782
    },
    {
      "epoch": 0.07783,
      "grad_norm": 1.1975303734486777,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 7783
    },
    {
      "epoch": 0.07784,
      "grad_norm": 1.0934483916125426,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 7784
    },
    {
      "epoch": 0.07785,
      "grad_norm": 1.108972454763063,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 7785
    },
    {
      "epoch": 0.07786,
      "grad_norm": 0.9879792571747996,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 7786
    },
    {
      "epoch": 0.07787,
      "grad_norm": 1.2023448684879607,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 7787
    },
    {
      "epoch": 0.07788,
      "grad_norm": 0.9538752828642438,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 7788
    },
    {
      "epoch": 0.07789,
      "grad_norm": 1.0308214934344375,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 7789
    },
    {
      "epoch": 0.0779,
      "grad_norm": 1.0650283574186172,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 7790
    },
    {
      "epoch": 0.07791,
      "grad_norm": 0.9941845094809683,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 7791
    },
    {
      "epoch": 0.07792,
      "grad_norm": 1.0225769158424693,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 7792
    },
    {
      "epoch": 0.07793,
      "grad_norm": 0.9877404519817934,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 7793
    },
    {
      "epoch": 0.07794,
      "grad_norm": 1.1151886303616545,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 7794
    },
    {
      "epoch": 0.07795,
      "grad_norm": 0.8354209579395252,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 7795
    },
    {
      "epoch": 0.07796,
      "grad_norm": 0.8544590286302752,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 7796
    },
    {
      "epoch": 0.07797,
      "grad_norm": 0.9428013064341034,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 7797
    },
    {
      "epoch": 0.07798,
      "grad_norm": 1.1393969133912794,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 7798
    },
    {
      "epoch": 0.07799,
      "grad_norm": 1.1641475465913624,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 7799
    },
    {
      "epoch": 0.078,
      "grad_norm": 1.0641337854621304,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 7800
    },
    {
      "epoch": 0.07801,
      "grad_norm": 1.3641996826697704,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 7801
    },
    {
      "epoch": 0.07802,
      "grad_norm": 1.1452620077902496,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 7802
    },
    {
      "epoch": 0.07803,
      "grad_norm": 1.1170775188247835,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 7803
    },
    {
      "epoch": 0.07804,
      "grad_norm": 1.296395122652786,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 7804
    },
    {
      "epoch": 0.07805,
      "grad_norm": 0.9159841573401555,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 7805
    },
    {
      "epoch": 0.07806,
      "grad_norm": 1.0076146294344561,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 7806
    },
    {
      "epoch": 0.07807,
      "grad_norm": 1.0288357992024348,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 7807
    },
    {
      "epoch": 0.07808,
      "grad_norm": 0.9962519995561984,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 7808
    },
    {
      "epoch": 0.07809,
      "grad_norm": 0.9318878928153395,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 7809
    },
    {
      "epoch": 0.0781,
      "grad_norm": 0.9578002546946285,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 7810
    },
    {
      "epoch": 0.07811,
      "grad_norm": 1.2362712301307206,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 7811
    },
    {
      "epoch": 0.07812,
      "grad_norm": 1.0902092466065818,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 7812
    },
    {
      "epoch": 0.07813,
      "grad_norm": 1.0656770200110066,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 7813
    },
    {
      "epoch": 0.07814,
      "grad_norm": 0.9616877242084323,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 7814
    },
    {
      "epoch": 0.07815,
      "grad_norm": 1.0827894290436153,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 7815
    },
    {
      "epoch": 0.07816,
      "grad_norm": 0.951863534026633,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 7816
    },
    {
      "epoch": 0.07817,
      "grad_norm": 1.0693420699978473,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 7817
    },
    {
      "epoch": 0.07818,
      "grad_norm": 1.1793875665990308,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 7818
    },
    {
      "epoch": 0.07819,
      "grad_norm": 0.9806237203551665,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 7819
    },
    {
      "epoch": 0.0782,
      "grad_norm": 1.074778244039972,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 7820
    },
    {
      "epoch": 0.07821,
      "grad_norm": 0.9930768173537193,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 7821
    },
    {
      "epoch": 0.07822,
      "grad_norm": 1.0685158401451227,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 7822
    },
    {
      "epoch": 0.07823,
      "grad_norm": 1.0871181323072148,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 7823
    },
    {
      "epoch": 0.07824,
      "grad_norm": 1.21966298784846,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 7824
    },
    {
      "epoch": 0.07825,
      "grad_norm": 1.0804803596808354,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 7825
    },
    {
      "epoch": 0.07826,
      "grad_norm": 1.019090987969821,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 7826
    },
    {
      "epoch": 0.07827,
      "grad_norm": 1.0988560306698911,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 7827
    },
    {
      "epoch": 0.07828,
      "grad_norm": 1.0517679422087889,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 7828
    },
    {
      "epoch": 0.07829,
      "grad_norm": 0.9621053073017727,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 7829
    },
    {
      "epoch": 0.0783,
      "grad_norm": 0.9689820333439864,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 7830
    },
    {
      "epoch": 0.07831,
      "grad_norm": 1.023016739445038,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 7831
    },
    {
      "epoch": 0.07832,
      "grad_norm": 0.9984156227464857,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 7832
    },
    {
      "epoch": 0.07833,
      "grad_norm": 0.9919107518305011,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 7833
    },
    {
      "epoch": 0.07834,
      "grad_norm": 1.1543019969673187,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 7834
    },
    {
      "epoch": 0.07835,
      "grad_norm": 1.0681590776729737,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 7835
    },
    {
      "epoch": 0.07836,
      "grad_norm": 1.0574570781003565,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 7836
    },
    {
      "epoch": 0.07837,
      "grad_norm": 1.2018757893559897,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 7837
    },
    {
      "epoch": 0.07838,
      "grad_norm": 0.9161356805849349,
      "learning_rate": 0.003,
      "loss": 4.0076,
      "step": 7838
    },
    {
      "epoch": 0.07839,
      "grad_norm": 0.9907463263104761,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 7839
    },
    {
      "epoch": 0.0784,
      "grad_norm": 1.1856358777888196,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 7840
    },
    {
      "epoch": 0.07841,
      "grad_norm": 0.8938988806605744,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 7841
    },
    {
      "epoch": 0.07842,
      "grad_norm": 1.0190036904110218,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 7842
    },
    {
      "epoch": 0.07843,
      "grad_norm": 1.2414498860928165,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 7843
    },
    {
      "epoch": 0.07844,
      "grad_norm": 0.8645804084986568,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 7844
    },
    {
      "epoch": 0.07845,
      "grad_norm": 1.2555968702914302,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 7845
    },
    {
      "epoch": 0.07846,
      "grad_norm": 1.1827535638877162,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 7846
    },
    {
      "epoch": 0.07847,
      "grad_norm": 1.0585750613727776,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 7847
    },
    {
      "epoch": 0.07848,
      "grad_norm": 1.0251038257294671,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 7848
    },
    {
      "epoch": 0.07849,
      "grad_norm": 1.0012756568810086,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 7849
    },
    {
      "epoch": 0.0785,
      "grad_norm": 1.1087682350572658,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 7850
    },
    {
      "epoch": 0.07851,
      "grad_norm": 1.0570558235082463,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 7851
    },
    {
      "epoch": 0.07852,
      "grad_norm": 1.1870988713474258,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 7852
    },
    {
      "epoch": 0.07853,
      "grad_norm": 1.1877824271372333,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 7853
    },
    {
      "epoch": 0.07854,
      "grad_norm": 0.9795576119811179,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 7854
    },
    {
      "epoch": 0.07855,
      "grad_norm": 1.0106904572896982,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 7855
    },
    {
      "epoch": 0.07856,
      "grad_norm": 1.074012949318807,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 7856
    },
    {
      "epoch": 0.07857,
      "grad_norm": 1.0682693133140797,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 7857
    },
    {
      "epoch": 0.07858,
      "grad_norm": 0.9622334233718971,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 7858
    },
    {
      "epoch": 0.07859,
      "grad_norm": 1.2435642589923817,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 7859
    },
    {
      "epoch": 0.0786,
      "grad_norm": 0.9713678691004027,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 7860
    },
    {
      "epoch": 0.07861,
      "grad_norm": 1.1992439198541343,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 7861
    },
    {
      "epoch": 0.07862,
      "grad_norm": 0.9729097717840791,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 7862
    },
    {
      "epoch": 0.07863,
      "grad_norm": 1.2123850094787298,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 7863
    },
    {
      "epoch": 0.07864,
      "grad_norm": 1.0554740247450138,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 7864
    },
    {
      "epoch": 0.07865,
      "grad_norm": 1.2691503318540769,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 7865
    },
    {
      "epoch": 0.07866,
      "grad_norm": 0.9510762936067487,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 7866
    },
    {
      "epoch": 0.07867,
      "grad_norm": 1.0063305379540366,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 7867
    },
    {
      "epoch": 0.07868,
      "grad_norm": 1.0938115833363737,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 7868
    },
    {
      "epoch": 0.07869,
      "grad_norm": 1.0089952434242428,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 7869
    },
    {
      "epoch": 0.0787,
      "grad_norm": 1.0654347163848492,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 7870
    },
    {
      "epoch": 0.07871,
      "grad_norm": 1.1907030044141855,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 7871
    },
    {
      "epoch": 0.07872,
      "grad_norm": 0.8730754246990861,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 7872
    },
    {
      "epoch": 0.07873,
      "grad_norm": 1.0021741546132197,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 7873
    },
    {
      "epoch": 0.07874,
      "grad_norm": 1.4383882439567937,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 7874
    },
    {
      "epoch": 0.07875,
      "grad_norm": 0.8464322093253535,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 7875
    },
    {
      "epoch": 0.07876,
      "grad_norm": 0.86475881480814,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 7876
    },
    {
      "epoch": 0.07877,
      "grad_norm": 1.109652921566004,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 7877
    },
    {
      "epoch": 0.07878,
      "grad_norm": 1.0879946460453025,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 7878
    },
    {
      "epoch": 0.07879,
      "grad_norm": 1.0666099280508157,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 7879
    },
    {
      "epoch": 0.0788,
      "grad_norm": 1.1532229023404108,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 7880
    },
    {
      "epoch": 0.07881,
      "grad_norm": 0.9157988575667765,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 7881
    },
    {
      "epoch": 0.07882,
      "grad_norm": 0.9989329398447048,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 7882
    },
    {
      "epoch": 0.07883,
      "grad_norm": 1.1179829054728383,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 7883
    },
    {
      "epoch": 0.07884,
      "grad_norm": 1.0047906870446375,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 7884
    },
    {
      "epoch": 0.07885,
      "grad_norm": 1.111240788046051,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 7885
    },
    {
      "epoch": 0.07886,
      "grad_norm": 1.0009988839259023,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 7886
    },
    {
      "epoch": 0.07887,
      "grad_norm": 1.0893230346535585,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 7887
    },
    {
      "epoch": 0.07888,
      "grad_norm": 1.05660595072792,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 7888
    },
    {
      "epoch": 0.07889,
      "grad_norm": 1.159409811390816,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 7889
    },
    {
      "epoch": 0.0789,
      "grad_norm": 0.8841007147309605,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 7890
    },
    {
      "epoch": 0.07891,
      "grad_norm": 1.1474805660791436,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 7891
    },
    {
      "epoch": 0.07892,
      "grad_norm": 1.0700800869552938,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 7892
    },
    {
      "epoch": 0.07893,
      "grad_norm": 1.0506523629123479,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 7893
    },
    {
      "epoch": 0.07894,
      "grad_norm": 1.1196303889660475,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 7894
    },
    {
      "epoch": 0.07895,
      "grad_norm": 1.28735191775539,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 7895
    },
    {
      "epoch": 0.07896,
      "grad_norm": 0.9643317762261,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 7896
    },
    {
      "epoch": 0.07897,
      "grad_norm": 1.1070256196078418,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 7897
    },
    {
      "epoch": 0.07898,
      "grad_norm": 1.0355518469983214,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 7898
    },
    {
      "epoch": 0.07899,
      "grad_norm": 1.2282329250453026,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 7899
    },
    {
      "epoch": 0.079,
      "grad_norm": 1.0945607454529,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 7900
    },
    {
      "epoch": 0.07901,
      "grad_norm": 1.0648956548783817,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 7901
    },
    {
      "epoch": 0.07902,
      "grad_norm": 1.0992859940840842,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 7902
    },
    {
      "epoch": 0.07903,
      "grad_norm": 0.8834013507865798,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 7903
    },
    {
      "epoch": 0.07904,
      "grad_norm": 0.9999633090920114,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 7904
    },
    {
      "epoch": 0.07905,
      "grad_norm": 1.0963454219201827,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 7905
    },
    {
      "epoch": 0.07906,
      "grad_norm": 0.9167535868840054,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 7906
    },
    {
      "epoch": 0.07907,
      "grad_norm": 1.0671834336811867,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 7907
    },
    {
      "epoch": 0.07908,
      "grad_norm": 0.9233899864843036,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 7908
    },
    {
      "epoch": 0.07909,
      "grad_norm": 1.0062840807702038,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 7909
    },
    {
      "epoch": 0.0791,
      "grad_norm": 1.235984238772413,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 7910
    },
    {
      "epoch": 0.07911,
      "grad_norm": 0.9452902767282305,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 7911
    },
    {
      "epoch": 0.07912,
      "grad_norm": 1.170408612990077,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 7912
    },
    {
      "epoch": 0.07913,
      "grad_norm": 0.96235951994912,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 7913
    },
    {
      "epoch": 0.07914,
      "grad_norm": 1.0169853669192117,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 7914
    },
    {
      "epoch": 0.07915,
      "grad_norm": 1.149306172968735,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 7915
    },
    {
      "epoch": 0.07916,
      "grad_norm": 0.9684272704389931,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 7916
    },
    {
      "epoch": 0.07917,
      "grad_norm": 1.0099443419186045,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 7917
    },
    {
      "epoch": 0.07918,
      "grad_norm": 1.2290204445682136,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 7918
    },
    {
      "epoch": 0.07919,
      "grad_norm": 0.950110955160198,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 7919
    },
    {
      "epoch": 0.0792,
      "grad_norm": 1.1616333982044544,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 7920
    },
    {
      "epoch": 0.07921,
      "grad_norm": 1.2133949678821083,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 7921
    },
    {
      "epoch": 0.07922,
      "grad_norm": 0.8406932934598226,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 7922
    },
    {
      "epoch": 0.07923,
      "grad_norm": 1.000384156582474,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 7923
    },
    {
      "epoch": 0.07924,
      "grad_norm": 1.102645789500658,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 7924
    },
    {
      "epoch": 0.07925,
      "grad_norm": 1.0684901214506717,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 7925
    },
    {
      "epoch": 0.07926,
      "grad_norm": 1.293652658373931,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 7926
    },
    {
      "epoch": 0.07927,
      "grad_norm": 0.9389310994335556,
      "learning_rate": 0.003,
      "loss": 4.0079,
      "step": 7927
    },
    {
      "epoch": 0.07928,
      "grad_norm": 1.081617761304198,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 7928
    },
    {
      "epoch": 0.07929,
      "grad_norm": 0.9572759773190526,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 7929
    },
    {
      "epoch": 0.0793,
      "grad_norm": 1.150932445412838,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 7930
    },
    {
      "epoch": 0.07931,
      "grad_norm": 1.0459375556090538,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 7931
    },
    {
      "epoch": 0.07932,
      "grad_norm": 1.2162128355571467,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 7932
    },
    {
      "epoch": 0.07933,
      "grad_norm": 0.8823256635368836,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 7933
    },
    {
      "epoch": 0.07934,
      "grad_norm": 1.0009325472605832,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 7934
    },
    {
      "epoch": 0.07935,
      "grad_norm": 1.0096305457720214,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 7935
    },
    {
      "epoch": 0.07936,
      "grad_norm": 1.268310154088615,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 7936
    },
    {
      "epoch": 0.07937,
      "grad_norm": 0.9021101857561977,
      "learning_rate": 0.003,
      "loss": 4.0083,
      "step": 7937
    },
    {
      "epoch": 0.07938,
      "grad_norm": 1.0291910822408756,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 7938
    },
    {
      "epoch": 0.07939,
      "grad_norm": 1.2336928484312266,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 7939
    },
    {
      "epoch": 0.0794,
      "grad_norm": 0.9200004067230584,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 7940
    },
    {
      "epoch": 0.07941,
      "grad_norm": 1.2029826125319552,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 7941
    },
    {
      "epoch": 0.07942,
      "grad_norm": 0.9905351217758338,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 7942
    },
    {
      "epoch": 0.07943,
      "grad_norm": 1.1672820977045213,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 7943
    },
    {
      "epoch": 0.07944,
      "grad_norm": 0.9552098668578122,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 7944
    },
    {
      "epoch": 0.07945,
      "grad_norm": 1.1163448086544936,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 7945
    },
    {
      "epoch": 0.07946,
      "grad_norm": 1.1575061742112112,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 7946
    },
    {
      "epoch": 0.07947,
      "grad_norm": 0.9387057322081345,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 7947
    },
    {
      "epoch": 0.07948,
      "grad_norm": 1.036922515434725,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 7948
    },
    {
      "epoch": 0.07949,
      "grad_norm": 1.0571677074257047,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 7949
    },
    {
      "epoch": 0.0795,
      "grad_norm": 0.9485396434466922,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 7950
    },
    {
      "epoch": 0.07951,
      "grad_norm": 0.9358661587180489,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 7951
    },
    {
      "epoch": 0.07952,
      "grad_norm": 1.072262899758602,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 7952
    },
    {
      "epoch": 0.07953,
      "grad_norm": 1.1384679393249275,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 7953
    },
    {
      "epoch": 0.07954,
      "grad_norm": 1.188750550794104,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 7954
    },
    {
      "epoch": 0.07955,
      "grad_norm": 1.088794668390209,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 7955
    },
    {
      "epoch": 0.07956,
      "grad_norm": 0.944395467836122,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 7956
    },
    {
      "epoch": 0.07957,
      "grad_norm": 0.9876860612041447,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 7957
    },
    {
      "epoch": 0.07958,
      "grad_norm": 1.007844983649128,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 7958
    },
    {
      "epoch": 0.07959,
      "grad_norm": 1.0039606523415259,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 7959
    },
    {
      "epoch": 0.0796,
      "grad_norm": 1.1452784506766251,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 7960
    },
    {
      "epoch": 0.07961,
      "grad_norm": 1.1250238968213198,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 7961
    },
    {
      "epoch": 0.07962,
      "grad_norm": 0.8771519916323887,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 7962
    },
    {
      "epoch": 0.07963,
      "grad_norm": 1.039301187147565,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 7963
    },
    {
      "epoch": 0.07964,
      "grad_norm": 1.1539629521338333,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 7964
    },
    {
      "epoch": 0.07965,
      "grad_norm": 1.006514542734939,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 7965
    },
    {
      "epoch": 0.07966,
      "grad_norm": 1.2419647152063866,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 7966
    },
    {
      "epoch": 0.07967,
      "grad_norm": 1.0445292324352928,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 7967
    },
    {
      "epoch": 0.07968,
      "grad_norm": 1.1283302172541068,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 7968
    },
    {
      "epoch": 0.07969,
      "grad_norm": 0.9667428222328616,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 7969
    },
    {
      "epoch": 0.0797,
      "grad_norm": 0.9853555487701827,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 7970
    },
    {
      "epoch": 0.07971,
      "grad_norm": 0.9314086481126423,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 7971
    },
    {
      "epoch": 0.07972,
      "grad_norm": 1.041017548263576,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 7972
    },
    {
      "epoch": 0.07973,
      "grad_norm": 1.1632363339227705,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 7973
    },
    {
      "epoch": 0.07974,
      "grad_norm": 0.9448198238046855,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 7974
    },
    {
      "epoch": 0.07975,
      "grad_norm": 1.2575451524297467,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 7975
    },
    {
      "epoch": 0.07976,
      "grad_norm": 0.9254382565643743,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 7976
    },
    {
      "epoch": 0.07977,
      "grad_norm": 0.9554319748351229,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 7977
    },
    {
      "epoch": 0.07978,
      "grad_norm": 1.1583747304811194,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 7978
    },
    {
      "epoch": 0.07979,
      "grad_norm": 1.1000321798363142,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 7979
    },
    {
      "epoch": 0.0798,
      "grad_norm": 0.8911730010907487,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 7980
    },
    {
      "epoch": 0.07981,
      "grad_norm": 1.020237000586781,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 7981
    },
    {
      "epoch": 0.07982,
      "grad_norm": 1.3180466552295464,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 7982
    },
    {
      "epoch": 0.07983,
      "grad_norm": 1.071553392145814,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 7983
    },
    {
      "epoch": 0.07984,
      "grad_norm": 1.2117191312115865,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 7984
    },
    {
      "epoch": 0.07985,
      "grad_norm": 0.7909400534108864,
      "learning_rate": 0.003,
      "loss": 4.0138,
      "step": 7985
    },
    {
      "epoch": 0.07986,
      "grad_norm": 0.69774180892976,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 7986
    },
    {
      "epoch": 0.07987,
      "grad_norm": 0.7627415677016504,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 7987
    },
    {
      "epoch": 0.07988,
      "grad_norm": 0.7977914673045347,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 7988
    },
    {
      "epoch": 0.07989,
      "grad_norm": 0.9057464007553878,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 7989
    },
    {
      "epoch": 0.0799,
      "grad_norm": 0.9968132407767837,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 7990
    },
    {
      "epoch": 0.07991,
      "grad_norm": 1.0049624387001375,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 7991
    },
    {
      "epoch": 0.07992,
      "grad_norm": 1.2238112277907751,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 7992
    },
    {
      "epoch": 0.07993,
      "grad_norm": 0.9170174217210549,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 7993
    },
    {
      "epoch": 0.07994,
      "grad_norm": 0.9120312741869245,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 7994
    },
    {
      "epoch": 0.07995,
      "grad_norm": 1.0185635782956717,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 7995
    },
    {
      "epoch": 0.07996,
      "grad_norm": 1.1924384020647054,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 7996
    },
    {
      "epoch": 0.07997,
      "grad_norm": 1.1865042797171061,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 7997
    },
    {
      "epoch": 0.07998,
      "grad_norm": 0.9979487786588422,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 7998
    },
    {
      "epoch": 0.07999,
      "grad_norm": 1.0145256510147855,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 7999
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.0061604187402993,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 8000
    },
    {
      "epoch": 0.08001,
      "grad_norm": 1.0877536512304802,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 8001
    },
    {
      "epoch": 0.08002,
      "grad_norm": 1.103363283471722,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 8002
    },
    {
      "epoch": 0.08003,
      "grad_norm": 1.1565936510093848,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 8003
    },
    {
      "epoch": 0.08004,
      "grad_norm": 0.8339485185169685,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 8004
    },
    {
      "epoch": 0.08005,
      "grad_norm": 0.8957972908506947,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 8005
    },
    {
      "epoch": 0.08006,
      "grad_norm": 0.9755062166369889,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 8006
    },
    {
      "epoch": 0.08007,
      "grad_norm": 1.1356741968512036,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 8007
    },
    {
      "epoch": 0.08008,
      "grad_norm": 0.9798864056276021,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 8008
    },
    {
      "epoch": 0.08009,
      "grad_norm": 1.1435111910784777,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 8009
    },
    {
      "epoch": 0.0801,
      "grad_norm": 1.1205942333697474,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 8010
    },
    {
      "epoch": 0.08011,
      "grad_norm": 1.131476082802707,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 8011
    },
    {
      "epoch": 0.08012,
      "grad_norm": 1.0461557826019758,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 8012
    },
    {
      "epoch": 0.08013,
      "grad_norm": 1.0524770267958885,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 8013
    },
    {
      "epoch": 0.08014,
      "grad_norm": 1.2440328295727094,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 8014
    },
    {
      "epoch": 0.08015,
      "grad_norm": 1.114392646783546,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 8015
    },
    {
      "epoch": 0.08016,
      "grad_norm": 1.0812684439503044,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 8016
    },
    {
      "epoch": 0.08017,
      "grad_norm": 1.1433960945173143,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 8017
    },
    {
      "epoch": 0.08018,
      "grad_norm": 0.9868904103661147,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 8018
    },
    {
      "epoch": 0.08019,
      "grad_norm": 1.084639898239188,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 8019
    },
    {
      "epoch": 0.0802,
      "grad_norm": 1.185981726128112,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 8020
    },
    {
      "epoch": 0.08021,
      "grad_norm": 1.0766762166520623,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 8021
    },
    {
      "epoch": 0.08022,
      "grad_norm": 0.9287360641086851,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 8022
    },
    {
      "epoch": 0.08023,
      "grad_norm": 0.9456722393800089,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 8023
    },
    {
      "epoch": 0.08024,
      "grad_norm": 1.0828009737424313,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 8024
    },
    {
      "epoch": 0.08025,
      "grad_norm": 1.2456772261250684,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 8025
    },
    {
      "epoch": 0.08026,
      "grad_norm": 1.0148217772847923,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 8026
    },
    {
      "epoch": 0.08027,
      "grad_norm": 0.9138608081544262,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 8027
    },
    {
      "epoch": 0.08028,
      "grad_norm": 0.9517142464911736,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 8028
    },
    {
      "epoch": 0.08029,
      "grad_norm": 1.1073434017859498,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 8029
    },
    {
      "epoch": 0.0803,
      "grad_norm": 0.9290905977228465,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 8030
    },
    {
      "epoch": 0.08031,
      "grad_norm": 0.9226578478764562,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 8031
    },
    {
      "epoch": 0.08032,
      "grad_norm": 1.0759256080505066,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 8032
    },
    {
      "epoch": 0.08033,
      "grad_norm": 0.9511400028043164,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 8033
    },
    {
      "epoch": 0.08034,
      "grad_norm": 1.1078049780159342,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 8034
    },
    {
      "epoch": 0.08035,
      "grad_norm": 0.9535032742600911,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 8035
    },
    {
      "epoch": 0.08036,
      "grad_norm": 1.1614455582425816,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 8036
    },
    {
      "epoch": 0.08037,
      "grad_norm": 0.8834094122204313,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 8037
    },
    {
      "epoch": 0.08038,
      "grad_norm": 1.1130873275597826,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 8038
    },
    {
      "epoch": 0.08039,
      "grad_norm": 1.211408515493295,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 8039
    },
    {
      "epoch": 0.0804,
      "grad_norm": 1.1685320433873827,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 8040
    },
    {
      "epoch": 0.08041,
      "grad_norm": 1.0277080460599624,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 8041
    },
    {
      "epoch": 0.08042,
      "grad_norm": 1.1973269882264728,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 8042
    },
    {
      "epoch": 0.08043,
      "grad_norm": 0.8271614764410427,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8043
    },
    {
      "epoch": 0.08044,
      "grad_norm": 0.8887802571055234,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 8044
    },
    {
      "epoch": 0.08045,
      "grad_norm": 1.2110808135009032,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 8045
    },
    {
      "epoch": 0.08046,
      "grad_norm": 0.9518356758920827,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 8046
    },
    {
      "epoch": 0.08047,
      "grad_norm": 1.3038849837413196,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 8047
    },
    {
      "epoch": 0.08048,
      "grad_norm": 0.919567623717858,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 8048
    },
    {
      "epoch": 0.08049,
      "grad_norm": 0.8954045094329315,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 8049
    },
    {
      "epoch": 0.0805,
      "grad_norm": 1.0858667351123445,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 8050
    },
    {
      "epoch": 0.08051,
      "grad_norm": 1.0474964812508383,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 8051
    },
    {
      "epoch": 0.08052,
      "grad_norm": 0.998847972432671,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 8052
    },
    {
      "epoch": 0.08053,
      "grad_norm": 1.0859084509999066,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 8053
    },
    {
      "epoch": 0.08054,
      "grad_norm": 0.9520041929258798,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 8054
    },
    {
      "epoch": 0.08055,
      "grad_norm": 0.8849900316097494,
      "learning_rate": 0.003,
      "loss": 4.0104,
      "step": 8055
    },
    {
      "epoch": 0.08056,
      "grad_norm": 1.0200607183606778,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 8056
    },
    {
      "epoch": 0.08057,
      "grad_norm": 1.385440316855803,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 8057
    },
    {
      "epoch": 0.08058,
      "grad_norm": 0.7560965761499825,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 8058
    },
    {
      "epoch": 0.08059,
      "grad_norm": 0.802745797203238,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 8059
    },
    {
      "epoch": 0.0806,
      "grad_norm": 0.8780307041913072,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 8060
    },
    {
      "epoch": 0.08061,
      "grad_norm": 0.9388145447642858,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 8061
    },
    {
      "epoch": 0.08062,
      "grad_norm": 1.1392056930959564,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 8062
    },
    {
      "epoch": 0.08063,
      "grad_norm": 1.1500771661810032,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 8063
    },
    {
      "epoch": 0.08064,
      "grad_norm": 1.2259785226656719,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 8064
    },
    {
      "epoch": 0.08065,
      "grad_norm": 0.9482855137356565,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 8065
    },
    {
      "epoch": 0.08066,
      "grad_norm": 1.1941185655993347,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 8066
    },
    {
      "epoch": 0.08067,
      "grad_norm": 1.0216574976533404,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 8067
    },
    {
      "epoch": 0.08068,
      "grad_norm": 1.1815373632833652,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 8068
    },
    {
      "epoch": 0.08069,
      "grad_norm": 0.8362728337965947,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 8069
    },
    {
      "epoch": 0.0807,
      "grad_norm": 1.0141924727587812,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 8070
    },
    {
      "epoch": 0.08071,
      "grad_norm": 1.2085013425881042,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 8071
    },
    {
      "epoch": 0.08072,
      "grad_norm": 1.1611590433461545,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 8072
    },
    {
      "epoch": 0.08073,
      "grad_norm": 1.1178371421514612,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 8073
    },
    {
      "epoch": 0.08074,
      "grad_norm": 0.8685364005438322,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 8074
    },
    {
      "epoch": 0.08075,
      "grad_norm": 0.9730008967866436,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 8075
    },
    {
      "epoch": 0.08076,
      "grad_norm": 1.2822538457110706,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 8076
    },
    {
      "epoch": 0.08077,
      "grad_norm": 1.0798964686596582,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 8077
    },
    {
      "epoch": 0.08078,
      "grad_norm": 1.2985664488989839,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 8078
    },
    {
      "epoch": 0.08079,
      "grad_norm": 0.9957269652282372,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 8079
    },
    {
      "epoch": 0.0808,
      "grad_norm": 0.8700008490746549,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 8080
    },
    {
      "epoch": 0.08081,
      "grad_norm": 0.9054583409173683,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 8081
    },
    {
      "epoch": 0.08082,
      "grad_norm": 0.9766818304459607,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 8082
    },
    {
      "epoch": 0.08083,
      "grad_norm": 1.0138171171604249,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 8083
    },
    {
      "epoch": 0.08084,
      "grad_norm": 1.1036070375778735,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 8084
    },
    {
      "epoch": 0.08085,
      "grad_norm": 1.1006361031316971,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 8085
    },
    {
      "epoch": 0.08086,
      "grad_norm": 1.2665221292524846,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 8086
    },
    {
      "epoch": 0.08087,
      "grad_norm": 0.87235683124875,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 8087
    },
    {
      "epoch": 0.08088,
      "grad_norm": 1.2101069976094259,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 8088
    },
    {
      "epoch": 0.08089,
      "grad_norm": 1.174020295847284,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 8089
    },
    {
      "epoch": 0.0809,
      "grad_norm": 1.0782301641574674,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 8090
    },
    {
      "epoch": 0.08091,
      "grad_norm": 1.3340904191723806,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 8091
    },
    {
      "epoch": 0.08092,
      "grad_norm": 0.9678394845670897,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 8092
    },
    {
      "epoch": 0.08093,
      "grad_norm": 1.1614114147755075,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 8093
    },
    {
      "epoch": 0.08094,
      "grad_norm": 1.1087224012161654,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 8094
    },
    {
      "epoch": 0.08095,
      "grad_norm": 1.280636136936636,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 8095
    },
    {
      "epoch": 0.08096,
      "grad_norm": 0.9820162541292741,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 8096
    },
    {
      "epoch": 0.08097,
      "grad_norm": 1.1089399244430402,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 8097
    },
    {
      "epoch": 0.08098,
      "grad_norm": 0.9715722278401574,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 8098
    },
    {
      "epoch": 0.08099,
      "grad_norm": 1.164836620545108,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 8099
    },
    {
      "epoch": 0.081,
      "grad_norm": 1.0306069756917127,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 8100
    },
    {
      "epoch": 0.08101,
      "grad_norm": 1.1971074710915723,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 8101
    },
    {
      "epoch": 0.08102,
      "grad_norm": 0.9529922879670738,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 8102
    },
    {
      "epoch": 0.08103,
      "grad_norm": 1.1839304856546549,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 8103
    },
    {
      "epoch": 0.08104,
      "grad_norm": 0.9885159799435862,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 8104
    },
    {
      "epoch": 0.08105,
      "grad_norm": 1.0385726215167874,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 8105
    },
    {
      "epoch": 0.08106,
      "grad_norm": 0.9513496918238994,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 8106
    },
    {
      "epoch": 0.08107,
      "grad_norm": 1.1227985010536334,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 8107
    },
    {
      "epoch": 0.08108,
      "grad_norm": 1.0247223199647508,
      "learning_rate": 0.003,
      "loss": 4.0012,
      "step": 8108
    },
    {
      "epoch": 0.08109,
      "grad_norm": 1.0113529850755245,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 8109
    },
    {
      "epoch": 0.0811,
      "grad_norm": 1.1642917754838757,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 8110
    },
    {
      "epoch": 0.08111,
      "grad_norm": 0.885899156879861,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 8111
    },
    {
      "epoch": 0.08112,
      "grad_norm": 1.0233932927167342,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 8112
    },
    {
      "epoch": 0.08113,
      "grad_norm": 1.1478499269861702,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 8113
    },
    {
      "epoch": 0.08114,
      "grad_norm": 1.191391018906478,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 8114
    },
    {
      "epoch": 0.08115,
      "grad_norm": 1.0398287191821558,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 8115
    },
    {
      "epoch": 0.08116,
      "grad_norm": 1.029396069350254,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 8116
    },
    {
      "epoch": 0.08117,
      "grad_norm": 1.0620529797775633,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 8117
    },
    {
      "epoch": 0.08118,
      "grad_norm": 1.153228275579987,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 8118
    },
    {
      "epoch": 0.08119,
      "grad_norm": 1.0235694255861916,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 8119
    },
    {
      "epoch": 0.0812,
      "grad_norm": 1.2920287937035275,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 8120
    },
    {
      "epoch": 0.08121,
      "grad_norm": 0.798826528722648,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 8121
    },
    {
      "epoch": 0.08122,
      "grad_norm": 0.8251701793435979,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 8122
    },
    {
      "epoch": 0.08123,
      "grad_norm": 0.9500120105243509,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 8123
    },
    {
      "epoch": 0.08124,
      "grad_norm": 1.1358148106927366,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 8124
    },
    {
      "epoch": 0.08125,
      "grad_norm": 0.9701373726642997,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 8125
    },
    {
      "epoch": 0.08126,
      "grad_norm": 1.303900333086603,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 8126
    },
    {
      "epoch": 0.08127,
      "grad_norm": 0.8348257183122614,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 8127
    },
    {
      "epoch": 0.08128,
      "grad_norm": 1.0168038658961494,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 8128
    },
    {
      "epoch": 0.08129,
      "grad_norm": 1.2570226327195435,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 8129
    },
    {
      "epoch": 0.0813,
      "grad_norm": 0.9835919552429621,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 8130
    },
    {
      "epoch": 0.08131,
      "grad_norm": 1.4786402848670535,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 8131
    },
    {
      "epoch": 0.08132,
      "grad_norm": 0.967482520755732,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 8132
    },
    {
      "epoch": 0.08133,
      "grad_norm": 1.0136679388024379,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 8133
    },
    {
      "epoch": 0.08134,
      "grad_norm": 1.3243507873969376,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 8134
    },
    {
      "epoch": 0.08135,
      "grad_norm": 1.1659701473075903,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8135
    },
    {
      "epoch": 0.08136,
      "grad_norm": 1.1146756067652475,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 8136
    },
    {
      "epoch": 0.08137,
      "grad_norm": 1.0102380938375048,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 8137
    },
    {
      "epoch": 0.08138,
      "grad_norm": 1.1104761872750726,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 8138
    },
    {
      "epoch": 0.08139,
      "grad_norm": 0.9806068045939812,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 8139
    },
    {
      "epoch": 0.0814,
      "grad_norm": 0.9578534939613226,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 8140
    },
    {
      "epoch": 0.08141,
      "grad_norm": 0.9783961516579012,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 8141
    },
    {
      "epoch": 0.08142,
      "grad_norm": 1.1102754179258492,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 8142
    },
    {
      "epoch": 0.08143,
      "grad_norm": 1.013761291015948,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 8143
    },
    {
      "epoch": 0.08144,
      "grad_norm": 1.0984848305189987,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 8144
    },
    {
      "epoch": 0.08145,
      "grad_norm": 1.0741146027806776,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 8145
    },
    {
      "epoch": 0.08146,
      "grad_norm": 1.0850941853188774,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 8146
    },
    {
      "epoch": 0.08147,
      "grad_norm": 1.0900280533229438,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 8147
    },
    {
      "epoch": 0.08148,
      "grad_norm": 1.1612246962032904,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 8148
    },
    {
      "epoch": 0.08149,
      "grad_norm": 1.3562111466807953,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 8149
    },
    {
      "epoch": 0.0815,
      "grad_norm": 0.9650515229317034,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 8150
    },
    {
      "epoch": 0.08151,
      "grad_norm": 1.1924509745774654,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 8151
    },
    {
      "epoch": 0.08152,
      "grad_norm": 1.094500106195963,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 8152
    },
    {
      "epoch": 0.08153,
      "grad_norm": 1.004251942188992,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 8153
    },
    {
      "epoch": 0.08154,
      "grad_norm": 1.1798560492422325,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 8154
    },
    {
      "epoch": 0.08155,
      "grad_norm": 1.0157829602982544,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 8155
    },
    {
      "epoch": 0.08156,
      "grad_norm": 1.2891395825570224,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 8156
    },
    {
      "epoch": 0.08157,
      "grad_norm": 0.9232969874496607,
      "learning_rate": 0.003,
      "loss": 3.9949,
      "step": 8157
    },
    {
      "epoch": 0.08158,
      "grad_norm": 1.0373678761694196,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 8158
    },
    {
      "epoch": 0.08159,
      "grad_norm": 1.3218980216269214,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 8159
    },
    {
      "epoch": 0.0816,
      "grad_norm": 0.9490086271587503,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 8160
    },
    {
      "epoch": 0.08161,
      "grad_norm": 1.0833199103673854,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 8161
    },
    {
      "epoch": 0.08162,
      "grad_norm": 1.2185544482985367,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 8162
    },
    {
      "epoch": 0.08163,
      "grad_norm": 0.9763505701602616,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 8163
    },
    {
      "epoch": 0.08164,
      "grad_norm": 1.066961091347535,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 8164
    },
    {
      "epoch": 0.08165,
      "grad_norm": 1.0509794131255215,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 8165
    },
    {
      "epoch": 0.08166,
      "grad_norm": 1.0757821468791668,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 8166
    },
    {
      "epoch": 0.08167,
      "grad_norm": 1.3397321654362855,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 8167
    },
    {
      "epoch": 0.08168,
      "grad_norm": 1.1362149152066932,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 8168
    },
    {
      "epoch": 0.08169,
      "grad_norm": 0.8763737391154698,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 8169
    },
    {
      "epoch": 0.0817,
      "grad_norm": 0.8325229180364018,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 8170
    },
    {
      "epoch": 0.08171,
      "grad_norm": 0.9119090572200499,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 8171
    },
    {
      "epoch": 0.08172,
      "grad_norm": 0.9166054245530775,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 8172
    },
    {
      "epoch": 0.08173,
      "grad_norm": 1.0922618916234996,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 8173
    },
    {
      "epoch": 0.08174,
      "grad_norm": 1.0948674878262272,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 8174
    },
    {
      "epoch": 0.08175,
      "grad_norm": 1.076487219894897,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 8175
    },
    {
      "epoch": 0.08176,
      "grad_norm": 1.0595377058120148,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 8176
    },
    {
      "epoch": 0.08177,
      "grad_norm": 1.1274198711334715,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 8177
    },
    {
      "epoch": 0.08178,
      "grad_norm": 1.0171853704517495,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 8178
    },
    {
      "epoch": 0.08179,
      "grad_norm": 1.3061262539701504,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 8179
    },
    {
      "epoch": 0.0818,
      "grad_norm": 0.9862361455218118,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 8180
    },
    {
      "epoch": 0.08181,
      "grad_norm": 1.248761174376701,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 8181
    },
    {
      "epoch": 0.08182,
      "grad_norm": 1.04459958881984,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 8182
    },
    {
      "epoch": 0.08183,
      "grad_norm": 1.034490751276438,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 8183
    },
    {
      "epoch": 0.08184,
      "grad_norm": 1.3084515904264853,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 8184
    },
    {
      "epoch": 0.08185,
      "grad_norm": 0.9554920895816984,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 8185
    },
    {
      "epoch": 0.08186,
      "grad_norm": 1.0543602272809869,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 8186
    },
    {
      "epoch": 0.08187,
      "grad_norm": 1.1260343697487754,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 8187
    },
    {
      "epoch": 0.08188,
      "grad_norm": 1.0245037440997349,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 8188
    },
    {
      "epoch": 0.08189,
      "grad_norm": 1.0290894153169319,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 8189
    },
    {
      "epoch": 0.0819,
      "grad_norm": 0.877806506800442,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 8190
    },
    {
      "epoch": 0.08191,
      "grad_norm": 0.8759876139628601,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 8191
    },
    {
      "epoch": 0.08192,
      "grad_norm": 1.0235597494522326,
      "learning_rate": 0.003,
      "loss": 4.0018,
      "step": 8192
    },
    {
      "epoch": 0.08193,
      "grad_norm": 1.0680156272560604,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 8193
    },
    {
      "epoch": 0.08194,
      "grad_norm": 0.9866872972933308,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 8194
    },
    {
      "epoch": 0.08195,
      "grad_norm": 1.0454954573691113,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 8195
    },
    {
      "epoch": 0.08196,
      "grad_norm": 0.878803096910163,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 8196
    },
    {
      "epoch": 0.08197,
      "grad_norm": 1.097095395513429,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 8197
    },
    {
      "epoch": 0.08198,
      "grad_norm": 1.4128502904759463,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 8198
    },
    {
      "epoch": 0.08199,
      "grad_norm": 0.9349249325846235,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 8199
    },
    {
      "epoch": 0.082,
      "grad_norm": 1.1340924047935328,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 8200
    },
    {
      "epoch": 0.08201,
      "grad_norm": 1.04912739515356,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 8201
    },
    {
      "epoch": 0.08202,
      "grad_norm": 1.0714645593844108,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 8202
    },
    {
      "epoch": 0.08203,
      "grad_norm": 1.164096753838808,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 8203
    },
    {
      "epoch": 0.08204,
      "grad_norm": 1.1316835410725452,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 8204
    },
    {
      "epoch": 0.08205,
      "grad_norm": 1.2120673109706728,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 8205
    },
    {
      "epoch": 0.08206,
      "grad_norm": 1.11571994095571,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 8206
    },
    {
      "epoch": 0.08207,
      "grad_norm": 1.0621146376384545,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 8207
    },
    {
      "epoch": 0.08208,
      "grad_norm": 1.1747546800940614,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 8208
    },
    {
      "epoch": 0.08209,
      "grad_norm": 0.8502432613824764,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 8209
    },
    {
      "epoch": 0.0821,
      "grad_norm": 0.9220719569509254,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 8210
    },
    {
      "epoch": 0.08211,
      "grad_norm": 1.2327782482092768,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 8211
    },
    {
      "epoch": 0.08212,
      "grad_norm": 1.019547573968891,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 8212
    },
    {
      "epoch": 0.08213,
      "grad_norm": 1.1264918883793147,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 8213
    },
    {
      "epoch": 0.08214,
      "grad_norm": 1.0153304383270647,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 8214
    },
    {
      "epoch": 0.08215,
      "grad_norm": 1.172513559366227,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 8215
    },
    {
      "epoch": 0.08216,
      "grad_norm": 1.1761345458122565,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 8216
    },
    {
      "epoch": 0.08217,
      "grad_norm": 1.0086245946196835,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 8217
    },
    {
      "epoch": 0.08218,
      "grad_norm": 1.0192220496141946,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 8218
    },
    {
      "epoch": 0.08219,
      "grad_norm": 1.1072279418839062,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 8219
    },
    {
      "epoch": 0.0822,
      "grad_norm": 1.2129949090775702,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 8220
    },
    {
      "epoch": 0.08221,
      "grad_norm": 1.335506660895218,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 8221
    },
    {
      "epoch": 0.08222,
      "grad_norm": 0.9100821320950854,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 8222
    },
    {
      "epoch": 0.08223,
      "grad_norm": 1.2359285375629427,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 8223
    },
    {
      "epoch": 0.08224,
      "grad_norm": 1.1420581603411728,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 8224
    },
    {
      "epoch": 0.08225,
      "grad_norm": 1.131941471315245,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 8225
    },
    {
      "epoch": 0.08226,
      "grad_norm": 0.9193337169990431,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 8226
    },
    {
      "epoch": 0.08227,
      "grad_norm": 0.9322903091927021,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 8227
    },
    {
      "epoch": 0.08228,
      "grad_norm": 1.015271996777266,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 8228
    },
    {
      "epoch": 0.08229,
      "grad_norm": 1.134812005676848,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 8229
    },
    {
      "epoch": 0.0823,
      "grad_norm": 0.9067833302627255,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 8230
    },
    {
      "epoch": 0.08231,
      "grad_norm": 1.1844642289666762,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 8231
    },
    {
      "epoch": 0.08232,
      "grad_norm": 1.074667737158904,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 8232
    },
    {
      "epoch": 0.08233,
      "grad_norm": 0.9455096038944548,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 8233
    },
    {
      "epoch": 0.08234,
      "grad_norm": 0.9451177013419999,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 8234
    },
    {
      "epoch": 0.08235,
      "grad_norm": 0.9751741426641366,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 8235
    },
    {
      "epoch": 0.08236,
      "grad_norm": 1.0139644613308296,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 8236
    },
    {
      "epoch": 0.08237,
      "grad_norm": 1.1102447919282574,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 8237
    },
    {
      "epoch": 0.08238,
      "grad_norm": 1.090996849580925,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 8238
    },
    {
      "epoch": 0.08239,
      "grad_norm": 1.1072648040046784,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 8239
    },
    {
      "epoch": 0.0824,
      "grad_norm": 0.963339170365707,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 8240
    },
    {
      "epoch": 0.08241,
      "grad_norm": 1.1416193830206067,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 8241
    },
    {
      "epoch": 0.08242,
      "grad_norm": 1.1161581273297645,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 8242
    },
    {
      "epoch": 0.08243,
      "grad_norm": 0.9836364537147602,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 8243
    },
    {
      "epoch": 0.08244,
      "grad_norm": 1.0051947565508417,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 8244
    },
    {
      "epoch": 0.08245,
      "grad_norm": 1.179881665824301,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8245
    },
    {
      "epoch": 0.08246,
      "grad_norm": 1.0994155612110759,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 8246
    },
    {
      "epoch": 0.08247,
      "grad_norm": 1.1514528107070527,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 8247
    },
    {
      "epoch": 0.08248,
      "grad_norm": 1.1175713935986686,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 8248
    },
    {
      "epoch": 0.08249,
      "grad_norm": 1.022832537141348,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 8249
    },
    {
      "epoch": 0.0825,
      "grad_norm": 1.2208883258896968,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 8250
    },
    {
      "epoch": 0.08251,
      "grad_norm": 0.9539194420518737,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 8251
    },
    {
      "epoch": 0.08252,
      "grad_norm": 1.0155458495353762,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 8252
    },
    {
      "epoch": 0.08253,
      "grad_norm": 1.241505078520073,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 8253
    },
    {
      "epoch": 0.08254,
      "grad_norm": 0.9520715771435223,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 8254
    },
    {
      "epoch": 0.08255,
      "grad_norm": 1.024935048858687,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 8255
    },
    {
      "epoch": 0.08256,
      "grad_norm": 1.233328543047205,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 8256
    },
    {
      "epoch": 0.08257,
      "grad_norm": 1.190460709783594,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 8257
    },
    {
      "epoch": 0.08258,
      "grad_norm": 1.0367792756754817,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 8258
    },
    {
      "epoch": 0.08259,
      "grad_norm": 1.169563709355769,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 8259
    },
    {
      "epoch": 0.0826,
      "grad_norm": 0.861527127229618,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 8260
    },
    {
      "epoch": 0.08261,
      "grad_norm": 0.8201507958780685,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 8261
    },
    {
      "epoch": 0.08262,
      "grad_norm": 0.9386291752296059,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 8262
    },
    {
      "epoch": 0.08263,
      "grad_norm": 1.0230682657821621,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 8263
    },
    {
      "epoch": 0.08264,
      "grad_norm": 1.2158135102547172,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 8264
    },
    {
      "epoch": 0.08265,
      "grad_norm": 0.9858027331224795,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 8265
    },
    {
      "epoch": 0.08266,
      "grad_norm": 1.1514602100198823,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 8266
    },
    {
      "epoch": 0.08267,
      "grad_norm": 1.19867896784868,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 8267
    },
    {
      "epoch": 0.08268,
      "grad_norm": 0.8924446930144582,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 8268
    },
    {
      "epoch": 0.08269,
      "grad_norm": 0.9332387923779609,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 8269
    },
    {
      "epoch": 0.0827,
      "grad_norm": 1.0395207333244374,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 8270
    },
    {
      "epoch": 0.08271,
      "grad_norm": 1.1217717636669475,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 8271
    },
    {
      "epoch": 0.08272,
      "grad_norm": 1.158015224233601,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 8272
    },
    {
      "epoch": 0.08273,
      "grad_norm": 1.2991517374950396,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 8273
    },
    {
      "epoch": 0.08274,
      "grad_norm": 1.081398113932392,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 8274
    },
    {
      "epoch": 0.08275,
      "grad_norm": 1.1176536482392028,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 8275
    },
    {
      "epoch": 0.08276,
      "grad_norm": 0.9350722584845963,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 8276
    },
    {
      "epoch": 0.08277,
      "grad_norm": 0.9894919434780528,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 8277
    },
    {
      "epoch": 0.08278,
      "grad_norm": 1.1769258157634674,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 8278
    },
    {
      "epoch": 0.08279,
      "grad_norm": 0.9433131457871408,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 8279
    },
    {
      "epoch": 0.0828,
      "grad_norm": 0.9576999263892194,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 8280
    },
    {
      "epoch": 0.08281,
      "grad_norm": 1.209178534584828,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 8281
    },
    {
      "epoch": 0.08282,
      "grad_norm": 0.9614787804606547,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 8282
    },
    {
      "epoch": 0.08283,
      "grad_norm": 1.0971465514681484,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 8283
    },
    {
      "epoch": 0.08284,
      "grad_norm": 1.0107622943623467,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 8284
    },
    {
      "epoch": 0.08285,
      "grad_norm": 1.15514741446281,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 8285
    },
    {
      "epoch": 0.08286,
      "grad_norm": 1.0676521040179816,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 8286
    },
    {
      "epoch": 0.08287,
      "grad_norm": 1.2393097291371007,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 8287
    },
    {
      "epoch": 0.08288,
      "grad_norm": 1.229284881723444,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 8288
    },
    {
      "epoch": 0.08289,
      "grad_norm": 0.8030678208057539,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 8289
    },
    {
      "epoch": 0.0829,
      "grad_norm": 0.9234071041203235,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 8290
    },
    {
      "epoch": 0.08291,
      "grad_norm": 1.2192535372086108,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 8291
    },
    {
      "epoch": 0.08292,
      "grad_norm": 1.0516037727758725,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 8292
    },
    {
      "epoch": 0.08293,
      "grad_norm": 0.964068579815504,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 8293
    },
    {
      "epoch": 0.08294,
      "grad_norm": 1.07685445953297,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 8294
    },
    {
      "epoch": 0.08295,
      "grad_norm": 1.2651219299645922,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 8295
    },
    {
      "epoch": 0.08296,
      "grad_norm": 0.9626687885048552,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 8296
    },
    {
      "epoch": 0.08297,
      "grad_norm": 0.9687755374419604,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 8297
    },
    {
      "epoch": 0.08298,
      "grad_norm": 1.3012272212633311,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 8298
    },
    {
      "epoch": 0.08299,
      "grad_norm": 0.9471340632405811,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 8299
    },
    {
      "epoch": 0.083,
      "grad_norm": 1.1889456485828054,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 8300
    },
    {
      "epoch": 0.08301,
      "grad_norm": 1.0926624866225692,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 8301
    },
    {
      "epoch": 0.08302,
      "grad_norm": 1.061747983105285,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 8302
    },
    {
      "epoch": 0.08303,
      "grad_norm": 1.2124144409859756,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 8303
    },
    {
      "epoch": 0.08304,
      "grad_norm": 1.0062093800648917,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 8304
    },
    {
      "epoch": 0.08305,
      "grad_norm": 1.0797635537801393,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 8305
    },
    {
      "epoch": 0.08306,
      "grad_norm": 1.2165041896744566,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 8306
    },
    {
      "epoch": 0.08307,
      "grad_norm": 0.9371964802211156,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 8307
    },
    {
      "epoch": 0.08308,
      "grad_norm": 1.0824545701121477,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 8308
    },
    {
      "epoch": 0.08309,
      "grad_norm": 1.0702785761306515,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 8309
    },
    {
      "epoch": 0.0831,
      "grad_norm": 0.9850867353661309,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 8310
    },
    {
      "epoch": 0.08311,
      "grad_norm": 1.059401767428526,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 8311
    },
    {
      "epoch": 0.08312,
      "grad_norm": 1.1525254824845346,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 8312
    },
    {
      "epoch": 0.08313,
      "grad_norm": 0.8651922401808925,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 8313
    },
    {
      "epoch": 0.08314,
      "grad_norm": 1.0602609432745576,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 8314
    },
    {
      "epoch": 0.08315,
      "grad_norm": 0.8182680455954303,
      "learning_rate": 0.003,
      "loss": 4.0078,
      "step": 8315
    },
    {
      "epoch": 0.08316,
      "grad_norm": 0.898921435803555,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 8316
    },
    {
      "epoch": 0.08317,
      "grad_norm": 1.262794585028179,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 8317
    },
    {
      "epoch": 0.08318,
      "grad_norm": 1.215745444075188,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 8318
    },
    {
      "epoch": 0.08319,
      "grad_norm": 1.019233573877823,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 8319
    },
    {
      "epoch": 0.0832,
      "grad_norm": 1.0398703718664923,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 8320
    },
    {
      "epoch": 0.08321,
      "grad_norm": 1.1361754781004614,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 8321
    },
    {
      "epoch": 0.08322,
      "grad_norm": 0.9093568729986535,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 8322
    },
    {
      "epoch": 0.08323,
      "grad_norm": 1.1369192324437696,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 8323
    },
    {
      "epoch": 0.08324,
      "grad_norm": 1.217916598721158,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 8324
    },
    {
      "epoch": 0.08325,
      "grad_norm": 1.0915850365976298,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 8325
    },
    {
      "epoch": 0.08326,
      "grad_norm": 1.1565123938632185,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 8326
    },
    {
      "epoch": 0.08327,
      "grad_norm": 0.8586661169517376,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 8327
    },
    {
      "epoch": 0.08328,
      "grad_norm": 0.8431082244999714,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 8328
    },
    {
      "epoch": 0.08329,
      "grad_norm": 0.831022312396331,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 8329
    },
    {
      "epoch": 0.0833,
      "grad_norm": 0.8854225855827627,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 8330
    },
    {
      "epoch": 0.08331,
      "grad_norm": 0.9489920933886857,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 8331
    },
    {
      "epoch": 0.08332,
      "grad_norm": 1.0014721691993675,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 8332
    },
    {
      "epoch": 0.08333,
      "grad_norm": 1.1572373116887549,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 8333
    },
    {
      "epoch": 0.08334,
      "grad_norm": 1.0696738130518826,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 8334
    },
    {
      "epoch": 0.08335,
      "grad_norm": 1.134011488039698,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 8335
    },
    {
      "epoch": 0.08336,
      "grad_norm": 1.171624442165235,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 8336
    },
    {
      "epoch": 0.08337,
      "grad_norm": 0.9808780299153776,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 8337
    },
    {
      "epoch": 0.08338,
      "grad_norm": 1.1552726886292524,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 8338
    },
    {
      "epoch": 0.08339,
      "grad_norm": 1.119935444567191,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 8339
    },
    {
      "epoch": 0.0834,
      "grad_norm": 1.1254896342797662,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 8340
    },
    {
      "epoch": 0.08341,
      "grad_norm": 1.1341861913032731,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 8341
    },
    {
      "epoch": 0.08342,
      "grad_norm": 1.025162475919914,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 8342
    },
    {
      "epoch": 0.08343,
      "grad_norm": 1.205937843754898,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 8343
    },
    {
      "epoch": 0.08344,
      "grad_norm": 0.9902520923365202,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 8344
    },
    {
      "epoch": 0.08345,
      "grad_norm": 1.288987691741149,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 8345
    },
    {
      "epoch": 0.08346,
      "grad_norm": 0.9551763232371178,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 8346
    },
    {
      "epoch": 0.08347,
      "grad_norm": 0.9880135377663202,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 8347
    },
    {
      "epoch": 0.08348,
      "grad_norm": 1.2888100524027102,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 8348
    },
    {
      "epoch": 0.08349,
      "grad_norm": 0.9525381368631001,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 8349
    },
    {
      "epoch": 0.0835,
      "grad_norm": 1.1902573539688603,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 8350
    },
    {
      "epoch": 0.08351,
      "grad_norm": 1.0469716149508292,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 8351
    },
    {
      "epoch": 0.08352,
      "grad_norm": 1.189680771320039,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 8352
    },
    {
      "epoch": 0.08353,
      "grad_norm": 0.9911168111334406,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 8353
    },
    {
      "epoch": 0.08354,
      "grad_norm": 1.0276631861654357,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 8354
    },
    {
      "epoch": 0.08355,
      "grad_norm": 0.9849728479941415,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 8355
    },
    {
      "epoch": 0.08356,
      "grad_norm": 1.01503709707315,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 8356
    },
    {
      "epoch": 0.08357,
      "grad_norm": 1.2200149399048952,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 8357
    },
    {
      "epoch": 0.08358,
      "grad_norm": 0.9309969506083857,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 8358
    },
    {
      "epoch": 0.08359,
      "grad_norm": 1.1903852595304782,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 8359
    },
    {
      "epoch": 0.0836,
      "grad_norm": 1.1339893715722242,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 8360
    },
    {
      "epoch": 0.08361,
      "grad_norm": 1.0643831400781818,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 8361
    },
    {
      "epoch": 0.08362,
      "grad_norm": 1.249532330084765,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 8362
    },
    {
      "epoch": 0.08363,
      "grad_norm": 0.8661252025928181,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 8363
    },
    {
      "epoch": 0.08364,
      "grad_norm": 0.90648079035645,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 8364
    },
    {
      "epoch": 0.08365,
      "grad_norm": 0.9973227794538887,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 8365
    },
    {
      "epoch": 0.08366,
      "grad_norm": 1.185845321832341,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 8366
    },
    {
      "epoch": 0.08367,
      "grad_norm": 1.0527110344332224,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 8367
    },
    {
      "epoch": 0.08368,
      "grad_norm": 1.1213177889830799,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 8368
    },
    {
      "epoch": 0.08369,
      "grad_norm": 0.8532852276611194,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 8369
    },
    {
      "epoch": 0.0837,
      "grad_norm": 0.9223226425587234,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 8370
    },
    {
      "epoch": 0.08371,
      "grad_norm": 1.0781243641141955,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 8371
    },
    {
      "epoch": 0.08372,
      "grad_norm": 0.9339538120598732,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 8372
    },
    {
      "epoch": 0.08373,
      "grad_norm": 1.4452876155141794,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 8373
    },
    {
      "epoch": 0.08374,
      "grad_norm": 0.9063853871722677,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 8374
    },
    {
      "epoch": 0.08375,
      "grad_norm": 0.993278332875694,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 8375
    },
    {
      "epoch": 0.08376,
      "grad_norm": 1.1855350373423543,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 8376
    },
    {
      "epoch": 0.08377,
      "grad_norm": 1.1147746898162996,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 8377
    },
    {
      "epoch": 0.08378,
      "grad_norm": 1.138214657903677,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 8378
    },
    {
      "epoch": 0.08379,
      "grad_norm": 1.2479011609022754,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 8379
    },
    {
      "epoch": 0.0838,
      "grad_norm": 1.1416308067638565,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 8380
    },
    {
      "epoch": 0.08381,
      "grad_norm": 0.8970625864687897,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 8381
    },
    {
      "epoch": 0.08382,
      "grad_norm": 1.1310709176053892,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 8382
    },
    {
      "epoch": 0.08383,
      "grad_norm": 1.2920665241918188,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 8383
    },
    {
      "epoch": 0.08384,
      "grad_norm": 1.0350723875968093,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 8384
    },
    {
      "epoch": 0.08385,
      "grad_norm": 1.103275398901523,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 8385
    },
    {
      "epoch": 0.08386,
      "grad_norm": 0.9681045513350701,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 8386
    },
    {
      "epoch": 0.08387,
      "grad_norm": 1.355083711142688,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 8387
    },
    {
      "epoch": 0.08388,
      "grad_norm": 0.7934403977447374,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 8388
    },
    {
      "epoch": 0.08389,
      "grad_norm": 0.9516574206564951,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 8389
    },
    {
      "epoch": 0.0839,
      "grad_norm": 1.1290072369116717,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 8390
    },
    {
      "epoch": 0.08391,
      "grad_norm": 1.0983041546433672,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 8391
    },
    {
      "epoch": 0.08392,
      "grad_norm": 1.1676787780611126,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 8392
    },
    {
      "epoch": 0.08393,
      "grad_norm": 0.941880845010308,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 8393
    },
    {
      "epoch": 0.08394,
      "grad_norm": 1.0752262942555577,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 8394
    },
    {
      "epoch": 0.08395,
      "grad_norm": 1.0717439798578736,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 8395
    },
    {
      "epoch": 0.08396,
      "grad_norm": 0.923502718166825,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 8396
    },
    {
      "epoch": 0.08397,
      "grad_norm": 1.03568380249952,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 8397
    },
    {
      "epoch": 0.08398,
      "grad_norm": 1.2308296952539537,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 8398
    },
    {
      "epoch": 0.08399,
      "grad_norm": 0.9074167959796567,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 8399
    },
    {
      "epoch": 0.084,
      "grad_norm": 1.0676950120843756,
      "learning_rate": 0.003,
      "loss": 4.0105,
      "step": 8400
    },
    {
      "epoch": 0.08401,
      "grad_norm": 1.1876371217178374,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 8401
    },
    {
      "epoch": 0.08402,
      "grad_norm": 0.9744971214519869,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 8402
    },
    {
      "epoch": 0.08403,
      "grad_norm": 1.1750219156987762,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 8403
    },
    {
      "epoch": 0.08404,
      "grad_norm": 1.0407406867360138,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 8404
    },
    {
      "epoch": 0.08405,
      "grad_norm": 1.2236517472521589,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 8405
    },
    {
      "epoch": 0.08406,
      "grad_norm": 1.0737329492962817,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 8406
    },
    {
      "epoch": 0.08407,
      "grad_norm": 1.1563501503982763,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 8407
    },
    {
      "epoch": 0.08408,
      "grad_norm": 1.151672035981857,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 8408
    },
    {
      "epoch": 0.08409,
      "grad_norm": 1.052519356480212,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 8409
    },
    {
      "epoch": 0.0841,
      "grad_norm": 1.3255584980401665,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 8410
    },
    {
      "epoch": 0.08411,
      "grad_norm": 0.7725053175360581,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 8411
    },
    {
      "epoch": 0.08412,
      "grad_norm": 0.9932518870157506,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 8412
    },
    {
      "epoch": 0.08413,
      "grad_norm": 1.3097609444125302,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 8413
    },
    {
      "epoch": 0.08414,
      "grad_norm": 0.8806084620399991,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 8414
    },
    {
      "epoch": 0.08415,
      "grad_norm": 1.1049915504817205,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 8415
    },
    {
      "epoch": 0.08416,
      "grad_norm": 1.0825647357972656,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 8416
    },
    {
      "epoch": 0.08417,
      "grad_norm": 0.982855881055315,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 8417
    },
    {
      "epoch": 0.08418,
      "grad_norm": 0.9521266964049695,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 8418
    },
    {
      "epoch": 0.08419,
      "grad_norm": 0.9807370150605913,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 8419
    },
    {
      "epoch": 0.0842,
      "grad_norm": 1.1284553332406668,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 8420
    },
    {
      "epoch": 0.08421,
      "grad_norm": 1.0701358935667225,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 8421
    },
    {
      "epoch": 0.08422,
      "grad_norm": 1.110363256131976,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 8422
    },
    {
      "epoch": 0.08423,
      "grad_norm": 1.0301265941795035,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 8423
    },
    {
      "epoch": 0.08424,
      "grad_norm": 1.2241888249026445,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 8424
    },
    {
      "epoch": 0.08425,
      "grad_norm": 1.2132223786728582,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 8425
    },
    {
      "epoch": 0.08426,
      "grad_norm": 1.053345472153612,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 8426
    },
    {
      "epoch": 0.08427,
      "grad_norm": 1.062648403306003,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 8427
    },
    {
      "epoch": 0.08428,
      "grad_norm": 0.9969338637013739,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 8428
    },
    {
      "epoch": 0.08429,
      "grad_norm": 1.1167478557553572,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 8429
    },
    {
      "epoch": 0.0843,
      "grad_norm": 0.9976920929387573,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 8430
    },
    {
      "epoch": 0.08431,
      "grad_norm": 1.068620820351791,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 8431
    },
    {
      "epoch": 0.08432,
      "grad_norm": 1.055723055328784,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 8432
    },
    {
      "epoch": 0.08433,
      "grad_norm": 1.135026949998236,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 8433
    },
    {
      "epoch": 0.08434,
      "grad_norm": 1.046614082716392,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 8434
    },
    {
      "epoch": 0.08435,
      "grad_norm": 1.2281345462765678,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 8435
    },
    {
      "epoch": 0.08436,
      "grad_norm": 1.0815704353084468,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 8436
    },
    {
      "epoch": 0.08437,
      "grad_norm": 1.0822120658950827,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 8437
    },
    {
      "epoch": 0.08438,
      "grad_norm": 1.0610781680082266,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 8438
    },
    {
      "epoch": 0.08439,
      "grad_norm": 1.1633949783616182,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 8439
    },
    {
      "epoch": 0.0844,
      "grad_norm": 1.2351950497416953,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 8440
    },
    {
      "epoch": 0.08441,
      "grad_norm": 0.9160963850716966,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 8441
    },
    {
      "epoch": 0.08442,
      "grad_norm": 1.0403778744976664,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 8442
    },
    {
      "epoch": 0.08443,
      "grad_norm": 0.9690769171898936,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 8443
    },
    {
      "epoch": 0.08444,
      "grad_norm": 1.0046811419165038,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 8444
    },
    {
      "epoch": 0.08445,
      "grad_norm": 1.1457379760825268,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 8445
    },
    {
      "epoch": 0.08446,
      "grad_norm": 1.0784653343143604,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 8446
    },
    {
      "epoch": 0.08447,
      "grad_norm": 0.9955575399367267,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 8447
    },
    {
      "epoch": 0.08448,
      "grad_norm": 1.0185612550385477,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 8448
    },
    {
      "epoch": 0.08449,
      "grad_norm": 1.1385992623128935,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 8449
    },
    {
      "epoch": 0.0845,
      "grad_norm": 1.1658036316438427,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 8450
    },
    {
      "epoch": 0.08451,
      "grad_norm": 1.0612990389711034,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 8451
    },
    {
      "epoch": 0.08452,
      "grad_norm": 1.1305785397594885,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 8452
    },
    {
      "epoch": 0.08453,
      "grad_norm": 0.8831441944452132,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 8453
    },
    {
      "epoch": 0.08454,
      "grad_norm": 1.1184930943973512,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 8454
    },
    {
      "epoch": 0.08455,
      "grad_norm": 1.3097730422669984,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 8455
    },
    {
      "epoch": 0.08456,
      "grad_norm": 0.9486933571257935,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8456
    },
    {
      "epoch": 0.08457,
      "grad_norm": 1.0027555477605425,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 8457
    },
    {
      "epoch": 0.08458,
      "grad_norm": 1.1962971696576739,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 8458
    },
    {
      "epoch": 0.08459,
      "grad_norm": 0.9387597135483401,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 8459
    },
    {
      "epoch": 0.0846,
      "grad_norm": 1.0383172475711102,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 8460
    },
    {
      "epoch": 0.08461,
      "grad_norm": 1.030965491865683,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 8461
    },
    {
      "epoch": 0.08462,
      "grad_norm": 1.2171453720071779,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 8462
    },
    {
      "epoch": 0.08463,
      "grad_norm": 1.0376733249187324,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 8463
    },
    {
      "epoch": 0.08464,
      "grad_norm": 1.0961626196613234,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 8464
    },
    {
      "epoch": 0.08465,
      "grad_norm": 1.0350888302747112,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 8465
    },
    {
      "epoch": 0.08466,
      "grad_norm": 1.064724029669904,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 8466
    },
    {
      "epoch": 0.08467,
      "grad_norm": 1.0710400208790887,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 8467
    },
    {
      "epoch": 0.08468,
      "grad_norm": 1.2789305198093153,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 8468
    },
    {
      "epoch": 0.08469,
      "grad_norm": 1.1452225551848991,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 8469
    },
    {
      "epoch": 0.0847,
      "grad_norm": 1.13409301786806,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 8470
    },
    {
      "epoch": 0.08471,
      "grad_norm": 1.1566161039870437,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 8471
    },
    {
      "epoch": 0.08472,
      "grad_norm": 1.089186577688795,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 8472
    },
    {
      "epoch": 0.08473,
      "grad_norm": 0.9900808592861112,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 8473
    },
    {
      "epoch": 0.08474,
      "grad_norm": 1.2998498233820543,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 8474
    },
    {
      "epoch": 0.08475,
      "grad_norm": 0.9225329005828342,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 8475
    },
    {
      "epoch": 0.08476,
      "grad_norm": 1.0564013920428952,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 8476
    },
    {
      "epoch": 0.08477,
      "grad_norm": 0.939864643930408,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 8477
    },
    {
      "epoch": 0.08478,
      "grad_norm": 0.9883124870718386,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 8478
    },
    {
      "epoch": 0.08479,
      "grad_norm": 0.9957851651749337,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 8479
    },
    {
      "epoch": 0.0848,
      "grad_norm": 1.178478915544431,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 8480
    },
    {
      "epoch": 0.08481,
      "grad_norm": 1.0741787634257696,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 8481
    },
    {
      "epoch": 0.08482,
      "grad_norm": 1.1194627479559824,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 8482
    },
    {
      "epoch": 0.08483,
      "grad_norm": 0.9960554568951283,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 8483
    },
    {
      "epoch": 0.08484,
      "grad_norm": 1.361511264699175,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 8484
    },
    {
      "epoch": 0.08485,
      "grad_norm": 1.1007501689045969,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 8485
    },
    {
      "epoch": 0.08486,
      "grad_norm": 1.174240597139128,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 8486
    },
    {
      "epoch": 0.08487,
      "grad_norm": 0.9639151938457295,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 8487
    },
    {
      "epoch": 0.08488,
      "grad_norm": 1.0352335647337247,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 8488
    },
    {
      "epoch": 0.08489,
      "grad_norm": 1.0493143503227644,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 8489
    },
    {
      "epoch": 0.0849,
      "grad_norm": 1.1208147184681883,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 8490
    },
    {
      "epoch": 0.08491,
      "grad_norm": 0.9315118881737822,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 8491
    },
    {
      "epoch": 0.08492,
      "grad_norm": 1.1354408904086009,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 8492
    },
    {
      "epoch": 0.08493,
      "grad_norm": 0.9756810751721894,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 8493
    },
    {
      "epoch": 0.08494,
      "grad_norm": 0.943874329273859,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 8494
    },
    {
      "epoch": 0.08495,
      "grad_norm": 0.9530790823279345,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 8495
    },
    {
      "epoch": 0.08496,
      "grad_norm": 1.0476242283034254,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 8496
    },
    {
      "epoch": 0.08497,
      "grad_norm": 1.2230946630382007,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 8497
    },
    {
      "epoch": 0.08498,
      "grad_norm": 1.1262934455755145,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 8498
    },
    {
      "epoch": 0.08499,
      "grad_norm": 1.1704572369620323,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 8499
    },
    {
      "epoch": 0.085,
      "grad_norm": 1.0437922946142013,
      "learning_rate": 0.003,
      "loss": 4.0079,
      "step": 8500
    },
    {
      "epoch": 0.08501,
      "grad_norm": 1.0686795257894022,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 8501
    },
    {
      "epoch": 0.08502,
      "grad_norm": 1.1527737710293182,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 8502
    },
    {
      "epoch": 0.08503,
      "grad_norm": 1.0146162316100584,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 8503
    },
    {
      "epoch": 0.08504,
      "grad_norm": 1.117321943006214,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 8504
    },
    {
      "epoch": 0.08505,
      "grad_norm": 1.1211708270006748,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 8505
    },
    {
      "epoch": 0.08506,
      "grad_norm": 1.0564903712086136,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 8506
    },
    {
      "epoch": 0.08507,
      "grad_norm": 1.0674811926990067,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 8507
    },
    {
      "epoch": 0.08508,
      "grad_norm": 1.361628142349855,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 8508
    },
    {
      "epoch": 0.08509,
      "grad_norm": 0.9588268292198073,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 8509
    },
    {
      "epoch": 0.0851,
      "grad_norm": 1.4027990342622492,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 8510
    },
    {
      "epoch": 0.08511,
      "grad_norm": 1.0313983203700878,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 8511
    },
    {
      "epoch": 0.08512,
      "grad_norm": 1.014747429488357,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 8512
    },
    {
      "epoch": 0.08513,
      "grad_norm": 1.1188271554621882,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 8513
    },
    {
      "epoch": 0.08514,
      "grad_norm": 0.9977123655522062,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 8514
    },
    {
      "epoch": 0.08515,
      "grad_norm": 1.0545034904025057,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 8515
    },
    {
      "epoch": 0.08516,
      "grad_norm": 1.2561317636760059,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 8516
    },
    {
      "epoch": 0.08517,
      "grad_norm": 1.015842099395916,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 8517
    },
    {
      "epoch": 0.08518,
      "grad_norm": 1.1154744018152205,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 8518
    },
    {
      "epoch": 0.08519,
      "grad_norm": 0.9308156900461738,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 8519
    },
    {
      "epoch": 0.0852,
      "grad_norm": 1.0123290179587556,
      "learning_rate": 0.003,
      "loss": 4.0101,
      "step": 8520
    },
    {
      "epoch": 0.08521,
      "grad_norm": 0.9696253911148752,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 8521
    },
    {
      "epoch": 0.08522,
      "grad_norm": 1.0651444858320227,
      "learning_rate": 0.003,
      "loss": 4.0076,
      "step": 8522
    },
    {
      "epoch": 0.08523,
      "grad_norm": 1.221416691858745,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 8523
    },
    {
      "epoch": 0.08524,
      "grad_norm": 1.0694054720010744,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 8524
    },
    {
      "epoch": 0.08525,
      "grad_norm": 1.206105613858707,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8525
    },
    {
      "epoch": 0.08526,
      "grad_norm": 1.125746365698348,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 8526
    },
    {
      "epoch": 0.08527,
      "grad_norm": 1.021213535127077,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 8527
    },
    {
      "epoch": 0.08528,
      "grad_norm": 1.0514486808333348,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 8528
    },
    {
      "epoch": 0.08529,
      "grad_norm": 0.8891881765729944,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 8529
    },
    {
      "epoch": 0.0853,
      "grad_norm": 1.0090942600896682,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 8530
    },
    {
      "epoch": 0.08531,
      "grad_norm": 1.077165689433922,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 8531
    },
    {
      "epoch": 0.08532,
      "grad_norm": 0.8886817388250743,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 8532
    },
    {
      "epoch": 0.08533,
      "grad_norm": 0.9499157917354463,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 8533
    },
    {
      "epoch": 0.08534,
      "grad_norm": 1.0462406691660049,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 8534
    },
    {
      "epoch": 0.08535,
      "grad_norm": 1.0427155178314294,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 8535
    },
    {
      "epoch": 0.08536,
      "grad_norm": 1.3345887932955967,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 8536
    },
    {
      "epoch": 0.08537,
      "grad_norm": 1.1424324189819963,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 8537
    },
    {
      "epoch": 0.08538,
      "grad_norm": 1.1087130620488592,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 8538
    },
    {
      "epoch": 0.08539,
      "grad_norm": 0.9423257796614134,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 8539
    },
    {
      "epoch": 0.0854,
      "grad_norm": 0.9989943262329959,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 8540
    },
    {
      "epoch": 0.08541,
      "grad_norm": 1.1159602342043562,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 8541
    },
    {
      "epoch": 0.08542,
      "grad_norm": 1.1750101404966282,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 8542
    },
    {
      "epoch": 0.08543,
      "grad_norm": 1.1097646268558357,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 8543
    },
    {
      "epoch": 0.08544,
      "grad_norm": 1.2246197163952683,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 8544
    },
    {
      "epoch": 0.08545,
      "grad_norm": 0.9006431906440999,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 8545
    },
    {
      "epoch": 0.08546,
      "grad_norm": 0.8535243027708214,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 8546
    },
    {
      "epoch": 0.08547,
      "grad_norm": 0.9091940967690285,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 8547
    },
    {
      "epoch": 0.08548,
      "grad_norm": 1.1204422349465504,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 8548
    },
    {
      "epoch": 0.08549,
      "grad_norm": 1.1376178894662337,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 8549
    },
    {
      "epoch": 0.0855,
      "grad_norm": 0.9004834981881831,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 8550
    },
    {
      "epoch": 0.08551,
      "grad_norm": 1.0908856802279696,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 8551
    },
    {
      "epoch": 0.08552,
      "grad_norm": 1.0747606537103462,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 8552
    },
    {
      "epoch": 0.08553,
      "grad_norm": 1.1824536376723846,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 8553
    },
    {
      "epoch": 0.08554,
      "grad_norm": 0.9327035296984333,
      "learning_rate": 0.003,
      "loss": 4.0105,
      "step": 8554
    },
    {
      "epoch": 0.08555,
      "grad_norm": 1.0845301764121567,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 8555
    },
    {
      "epoch": 0.08556,
      "grad_norm": 1.0781500615217907,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 8556
    },
    {
      "epoch": 0.08557,
      "grad_norm": 1.1114030326740636,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 8557
    },
    {
      "epoch": 0.08558,
      "grad_norm": 1.1387705031741684,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 8558
    },
    {
      "epoch": 0.08559,
      "grad_norm": 1.0373792894377512,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 8559
    },
    {
      "epoch": 0.0856,
      "grad_norm": 1.205780356820795,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 8560
    },
    {
      "epoch": 0.08561,
      "grad_norm": 0.8516562843948833,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 8561
    },
    {
      "epoch": 0.08562,
      "grad_norm": 0.933449495259518,
      "learning_rate": 0.003,
      "loss": 4.007,
      "step": 8562
    },
    {
      "epoch": 0.08563,
      "grad_norm": 1.0726820040186111,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 8563
    },
    {
      "epoch": 0.08564,
      "grad_norm": 1.0347716349195137,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 8564
    },
    {
      "epoch": 0.08565,
      "grad_norm": 1.0186874520180855,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 8565
    },
    {
      "epoch": 0.08566,
      "grad_norm": 1.1233566323209978,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 8566
    },
    {
      "epoch": 0.08567,
      "grad_norm": 1.1961057328383637,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 8567
    },
    {
      "epoch": 0.08568,
      "grad_norm": 1.0511200423129479,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 8568
    },
    {
      "epoch": 0.08569,
      "grad_norm": 1.1119472613821024,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 8569
    },
    {
      "epoch": 0.0857,
      "grad_norm": 1.1318905094833156,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 8570
    },
    {
      "epoch": 0.08571,
      "grad_norm": 1.3705938071159904,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 8571
    },
    {
      "epoch": 0.08572,
      "grad_norm": 0.8350836435173717,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 8572
    },
    {
      "epoch": 0.08573,
      "grad_norm": 0.9539575731341017,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 8573
    },
    {
      "epoch": 0.08574,
      "grad_norm": 1.2227418528249123,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 8574
    },
    {
      "epoch": 0.08575,
      "grad_norm": 1.0933729376633516,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 8575
    },
    {
      "epoch": 0.08576,
      "grad_norm": 1.011476248931269,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 8576
    },
    {
      "epoch": 0.08577,
      "grad_norm": 0.9840937697692118,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 8577
    },
    {
      "epoch": 0.08578,
      "grad_norm": 1.202980530727398,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 8578
    },
    {
      "epoch": 0.08579,
      "grad_norm": 0.9372317642711228,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 8579
    },
    {
      "epoch": 0.0858,
      "grad_norm": 1.1233693209921423,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 8580
    },
    {
      "epoch": 0.08581,
      "grad_norm": 1.2444195714201942,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 8581
    },
    {
      "epoch": 0.08582,
      "grad_norm": 1.1497113076403636,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 8582
    },
    {
      "epoch": 0.08583,
      "grad_norm": 0.8869126729713231,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 8583
    },
    {
      "epoch": 0.08584,
      "grad_norm": 0.9033828604652565,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 8584
    },
    {
      "epoch": 0.08585,
      "grad_norm": 1.2413242779151106,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 8585
    },
    {
      "epoch": 0.08586,
      "grad_norm": 0.8785419954113871,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 8586
    },
    {
      "epoch": 0.08587,
      "grad_norm": 1.0721276325176368,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 8587
    },
    {
      "epoch": 0.08588,
      "grad_norm": 1.1679761583906054,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 8588
    },
    {
      "epoch": 0.08589,
      "grad_norm": 1.080344129233115,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 8589
    },
    {
      "epoch": 0.0859,
      "grad_norm": 0.9870833943241247,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8590
    },
    {
      "epoch": 0.08591,
      "grad_norm": 1.034639577282747,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 8591
    },
    {
      "epoch": 0.08592,
      "grad_norm": 1.0710719468181558,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 8592
    },
    {
      "epoch": 0.08593,
      "grad_norm": 0.9888408563640142,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 8593
    },
    {
      "epoch": 0.08594,
      "grad_norm": 1.2453584999330138,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 8594
    },
    {
      "epoch": 0.08595,
      "grad_norm": 1.0196678830693755,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 8595
    },
    {
      "epoch": 0.08596,
      "grad_norm": 1.1945792959997095,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 8596
    },
    {
      "epoch": 0.08597,
      "grad_norm": 1.1530581283872166,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 8597
    },
    {
      "epoch": 0.08598,
      "grad_norm": 1.3926960034791933,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 8598
    },
    {
      "epoch": 0.08599,
      "grad_norm": 0.826192764112842,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 8599
    },
    {
      "epoch": 0.086,
      "grad_norm": 1.06077334266566,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 8600
    },
    {
      "epoch": 0.08601,
      "grad_norm": 1.1871115259087262,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 8601
    },
    {
      "epoch": 0.08602,
      "grad_norm": 1.2181866405394508,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 8602
    },
    {
      "epoch": 0.08603,
      "grad_norm": 1.0883340096060563,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 8603
    },
    {
      "epoch": 0.08604,
      "grad_norm": 0.9503342506965254,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 8604
    },
    {
      "epoch": 0.08605,
      "grad_norm": 1.2172592642031064,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 8605
    },
    {
      "epoch": 0.08606,
      "grad_norm": 1.136390303673651,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 8606
    },
    {
      "epoch": 0.08607,
      "grad_norm": 1.1355782479254777,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 8607
    },
    {
      "epoch": 0.08608,
      "grad_norm": 1.0517606318333685,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 8608
    },
    {
      "epoch": 0.08609,
      "grad_norm": 0.9420025556721784,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 8609
    },
    {
      "epoch": 0.0861,
      "grad_norm": 0.9141311653122305,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 8610
    },
    {
      "epoch": 0.08611,
      "grad_norm": 1.0548509864539932,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 8611
    },
    {
      "epoch": 0.08612,
      "grad_norm": 1.001800534694237,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 8612
    },
    {
      "epoch": 0.08613,
      "grad_norm": 1.0972442988257152,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 8613
    },
    {
      "epoch": 0.08614,
      "grad_norm": 0.9428815080476953,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 8614
    },
    {
      "epoch": 0.08615,
      "grad_norm": 1.0484637576712974,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 8615
    },
    {
      "epoch": 0.08616,
      "grad_norm": 1.0917980757047034,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 8616
    },
    {
      "epoch": 0.08617,
      "grad_norm": 1.2097536460049183,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 8617
    },
    {
      "epoch": 0.08618,
      "grad_norm": 1.3562182619090937,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 8618
    },
    {
      "epoch": 0.08619,
      "grad_norm": 0.9246195111624731,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 8619
    },
    {
      "epoch": 0.0862,
      "grad_norm": 0.9824968489316007,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 8620
    },
    {
      "epoch": 0.08621,
      "grad_norm": 1.0258471276787318,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 8621
    },
    {
      "epoch": 0.08622,
      "grad_norm": 1.183578647619695,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 8622
    },
    {
      "epoch": 0.08623,
      "grad_norm": 1.1381975301836866,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 8623
    },
    {
      "epoch": 0.08624,
      "grad_norm": 1.1630194311964601,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 8624
    },
    {
      "epoch": 0.08625,
      "grad_norm": 0.9306587451164217,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 8625
    },
    {
      "epoch": 0.08626,
      "grad_norm": 1.0498514896735527,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 8626
    },
    {
      "epoch": 0.08627,
      "grad_norm": 1.1755016236915463,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 8627
    },
    {
      "epoch": 0.08628,
      "grad_norm": 0.938529239831759,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 8628
    },
    {
      "epoch": 0.08629,
      "grad_norm": 1.0144244468682728,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 8629
    },
    {
      "epoch": 0.0863,
      "grad_norm": 1.113738693838708,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 8630
    },
    {
      "epoch": 0.08631,
      "grad_norm": 1.1256675059051566,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 8631
    },
    {
      "epoch": 0.08632,
      "grad_norm": 0.8974862907760961,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 8632
    },
    {
      "epoch": 0.08633,
      "grad_norm": 1.0495156855470718,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 8633
    },
    {
      "epoch": 0.08634,
      "grad_norm": 1.2948675647945798,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 8634
    },
    {
      "epoch": 0.08635,
      "grad_norm": 1.0184506641521571,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 8635
    },
    {
      "epoch": 0.08636,
      "grad_norm": 1.368584927067475,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 8636
    },
    {
      "epoch": 0.08637,
      "grad_norm": 0.9162061134202947,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 8637
    },
    {
      "epoch": 0.08638,
      "grad_norm": 1.0811639530557986,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 8638
    },
    {
      "epoch": 0.08639,
      "grad_norm": 0.9376836857950404,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 8639
    },
    {
      "epoch": 0.0864,
      "grad_norm": 1.0873412665068496,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 8640
    },
    {
      "epoch": 0.08641,
      "grad_norm": 1.1255192902825057,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 8641
    },
    {
      "epoch": 0.08642,
      "grad_norm": 1.1145569844437186,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 8642
    },
    {
      "epoch": 0.08643,
      "grad_norm": 1.4460777384526706,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 8643
    },
    {
      "epoch": 0.08644,
      "grad_norm": 0.9245977533541625,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 8644
    },
    {
      "epoch": 0.08645,
      "grad_norm": 1.0181172313173914,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 8645
    },
    {
      "epoch": 0.08646,
      "grad_norm": 1.1454056707809717,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 8646
    },
    {
      "epoch": 0.08647,
      "grad_norm": 1.0065775077778298,
      "learning_rate": 0.003,
      "loss": 4.007,
      "step": 8647
    },
    {
      "epoch": 0.08648,
      "grad_norm": 1.2202424385911437,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 8648
    },
    {
      "epoch": 0.08649,
      "grad_norm": 0.9257171603914495,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 8649
    },
    {
      "epoch": 0.0865,
      "grad_norm": 0.9390491681338301,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 8650
    },
    {
      "epoch": 0.08651,
      "grad_norm": 1.1931127237305668,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 8651
    },
    {
      "epoch": 0.08652,
      "grad_norm": 0.9274015309476554,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 8652
    },
    {
      "epoch": 0.08653,
      "grad_norm": 1.02289631221439,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 8653
    },
    {
      "epoch": 0.08654,
      "grad_norm": 1.3262485339916,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 8654
    },
    {
      "epoch": 0.08655,
      "grad_norm": 1.2428022427956695,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 8655
    },
    {
      "epoch": 0.08656,
      "grad_norm": 1.031840617295688,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 8656
    },
    {
      "epoch": 0.08657,
      "grad_norm": 1.325567296493681,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 8657
    },
    {
      "epoch": 0.08658,
      "grad_norm": 1.0189750997869491,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 8658
    },
    {
      "epoch": 0.08659,
      "grad_norm": 1.209380922792147,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 8659
    },
    {
      "epoch": 0.0866,
      "grad_norm": 1.0610985840216802,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 8660
    },
    {
      "epoch": 0.08661,
      "grad_norm": 1.0561305118599713,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 8661
    },
    {
      "epoch": 0.08662,
      "grad_norm": 1.0679146127811068,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 8662
    },
    {
      "epoch": 0.08663,
      "grad_norm": 0.9064928152737026,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 8663
    },
    {
      "epoch": 0.08664,
      "grad_norm": 1.0923363230039982,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 8664
    },
    {
      "epoch": 0.08665,
      "grad_norm": 1.1988464238998455,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 8665
    },
    {
      "epoch": 0.08666,
      "grad_norm": 1.0248023151858396,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 8666
    },
    {
      "epoch": 0.08667,
      "grad_norm": 1.1148548362397608,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 8667
    },
    {
      "epoch": 0.08668,
      "grad_norm": 1.0412822855875483,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 8668
    },
    {
      "epoch": 0.08669,
      "grad_norm": 1.2096435963768866,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 8669
    },
    {
      "epoch": 0.0867,
      "grad_norm": 1.123266840921136,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 8670
    },
    {
      "epoch": 0.08671,
      "grad_norm": 1.0877613190872542,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 8671
    },
    {
      "epoch": 0.08672,
      "grad_norm": 1.1406500907143475,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 8672
    },
    {
      "epoch": 0.08673,
      "grad_norm": 0.9398367617096632,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 8673
    },
    {
      "epoch": 0.08674,
      "grad_norm": 1.0253292803580725,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 8674
    },
    {
      "epoch": 0.08675,
      "grad_norm": 1.1953506016711317,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 8675
    },
    {
      "epoch": 0.08676,
      "grad_norm": 0.9488718707211112,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 8676
    },
    {
      "epoch": 0.08677,
      "grad_norm": 1.0431682629276453,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 8677
    },
    {
      "epoch": 0.08678,
      "grad_norm": 1.193005747951154,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 8678
    },
    {
      "epoch": 0.08679,
      "grad_norm": 1.0144417828651375,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 8679
    },
    {
      "epoch": 0.0868,
      "grad_norm": 1.1348128884099915,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 8680
    },
    {
      "epoch": 0.08681,
      "grad_norm": 1.1097911864091226,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 8681
    },
    {
      "epoch": 0.08682,
      "grad_norm": 1.0697846104835937,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 8682
    },
    {
      "epoch": 0.08683,
      "grad_norm": 1.0607468846889823,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 8683
    },
    {
      "epoch": 0.08684,
      "grad_norm": 1.0862632076397618,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 8684
    },
    {
      "epoch": 0.08685,
      "grad_norm": 1.1148108606990261,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 8685
    },
    {
      "epoch": 0.08686,
      "grad_norm": 1.0892861530880062,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 8686
    },
    {
      "epoch": 0.08687,
      "grad_norm": 1.2033955582498337,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 8687
    },
    {
      "epoch": 0.08688,
      "grad_norm": 0.9860469350967052,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 8688
    },
    {
      "epoch": 0.08689,
      "grad_norm": 0.887845078826922,
      "learning_rate": 0.003,
      "loss": 3.9991,
      "step": 8689
    },
    {
      "epoch": 0.0869,
      "grad_norm": 0.981947563992108,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 8690
    },
    {
      "epoch": 0.08691,
      "grad_norm": 1.2386584406913188,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 8691
    },
    {
      "epoch": 0.08692,
      "grad_norm": 1.1214274326159783,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 8692
    },
    {
      "epoch": 0.08693,
      "grad_norm": 0.9759935969342685,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 8693
    },
    {
      "epoch": 0.08694,
      "grad_norm": 0.9952282248413553,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 8694
    },
    {
      "epoch": 0.08695,
      "grad_norm": 1.2054815926609244,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 8695
    },
    {
      "epoch": 0.08696,
      "grad_norm": 0.9636503750723039,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 8696
    },
    {
      "epoch": 0.08697,
      "grad_norm": 1.0925520713595345,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 8697
    },
    {
      "epoch": 0.08698,
      "grad_norm": 0.8854366350443418,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 8698
    },
    {
      "epoch": 0.08699,
      "grad_norm": 0.9842767035973804,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 8699
    },
    {
      "epoch": 0.087,
      "grad_norm": 1.1497030489175517,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 8700
    },
    {
      "epoch": 0.08701,
      "grad_norm": 1.220812201035816,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 8701
    },
    {
      "epoch": 0.08702,
      "grad_norm": 1.0247846783178878,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 8702
    },
    {
      "epoch": 0.08703,
      "grad_norm": 1.0010386929193278,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 8703
    },
    {
      "epoch": 0.08704,
      "grad_norm": 1.0616711530427767,
      "learning_rate": 0.003,
      "loss": 4.0059,
      "step": 8704
    },
    {
      "epoch": 0.08705,
      "grad_norm": 0.9976320350325579,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 8705
    },
    {
      "epoch": 0.08706,
      "grad_norm": 1.1942693214140914,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 8706
    },
    {
      "epoch": 0.08707,
      "grad_norm": 1.073245473169283,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 8707
    },
    {
      "epoch": 0.08708,
      "grad_norm": 1.1941251771612393,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 8708
    },
    {
      "epoch": 0.08709,
      "grad_norm": 1.054313122339438,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 8709
    },
    {
      "epoch": 0.0871,
      "grad_norm": 1.1735283159957155,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 8710
    },
    {
      "epoch": 0.08711,
      "grad_norm": 1.224731249555308,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 8711
    },
    {
      "epoch": 0.08712,
      "grad_norm": 1.1071733779330812,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 8712
    },
    {
      "epoch": 0.08713,
      "grad_norm": 1.207587077778471,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 8713
    },
    {
      "epoch": 0.08714,
      "grad_norm": 1.035590781407661,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 8714
    },
    {
      "epoch": 0.08715,
      "grad_norm": 0.9210397372280619,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 8715
    },
    {
      "epoch": 0.08716,
      "grad_norm": 1.0736853335371308,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 8716
    },
    {
      "epoch": 0.08717,
      "grad_norm": 1.2449278196726106,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 8717
    },
    {
      "epoch": 0.08718,
      "grad_norm": 0.9356165132727563,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 8718
    },
    {
      "epoch": 0.08719,
      "grad_norm": 1.1410701425015397,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 8719
    },
    {
      "epoch": 0.0872,
      "grad_norm": 1.0473904852864253,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 8720
    },
    {
      "epoch": 0.08721,
      "grad_norm": 1.1346851924291141,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 8721
    },
    {
      "epoch": 0.08722,
      "grad_norm": 0.9947376087216057,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8722
    },
    {
      "epoch": 0.08723,
      "grad_norm": 1.2565032752641185,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 8723
    },
    {
      "epoch": 0.08724,
      "grad_norm": 1.013947154344129,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 8724
    },
    {
      "epoch": 0.08725,
      "grad_norm": 1.2550507340568873,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 8725
    },
    {
      "epoch": 0.08726,
      "grad_norm": 1.1552353519239422,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 8726
    },
    {
      "epoch": 0.08727,
      "grad_norm": 1.0836695734061212,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 8727
    },
    {
      "epoch": 0.08728,
      "grad_norm": 0.9999247167010374,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 8728
    },
    {
      "epoch": 0.08729,
      "grad_norm": 1.142902216241174,
      "learning_rate": 0.003,
      "loss": 4.0113,
      "step": 8729
    },
    {
      "epoch": 0.0873,
      "grad_norm": 1.091022164759464,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 8730
    },
    {
      "epoch": 0.08731,
      "grad_norm": 1.0210290074405686,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 8731
    },
    {
      "epoch": 0.08732,
      "grad_norm": 1.1449497076583306,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 8732
    },
    {
      "epoch": 0.08733,
      "grad_norm": 1.0036816386493732,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 8733
    },
    {
      "epoch": 0.08734,
      "grad_norm": 1.3034518911251176,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 8734
    },
    {
      "epoch": 0.08735,
      "grad_norm": 1.035050110835489,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 8735
    },
    {
      "epoch": 0.08736,
      "grad_norm": 1.059239037825648,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 8736
    },
    {
      "epoch": 0.08737,
      "grad_norm": 1.1209703870238803,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 8737
    },
    {
      "epoch": 0.08738,
      "grad_norm": 0.897095918930477,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 8738
    },
    {
      "epoch": 0.08739,
      "grad_norm": 0.9994054796041234,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 8739
    },
    {
      "epoch": 0.0874,
      "grad_norm": 1.3452908356127442,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 8740
    },
    {
      "epoch": 0.08741,
      "grad_norm": 1.1093603042004494,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 8741
    },
    {
      "epoch": 0.08742,
      "grad_norm": 0.9596481129611945,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 8742
    },
    {
      "epoch": 0.08743,
      "grad_norm": 1.0733682110935197,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 8743
    },
    {
      "epoch": 0.08744,
      "grad_norm": 1.212621060059208,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 8744
    },
    {
      "epoch": 0.08745,
      "grad_norm": 1.0255959780393786,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 8745
    },
    {
      "epoch": 0.08746,
      "grad_norm": 1.111182154226899,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 8746
    },
    {
      "epoch": 0.08747,
      "grad_norm": 1.0813261845403195,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 8747
    },
    {
      "epoch": 0.08748,
      "grad_norm": 1.2389683997355077,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 8748
    },
    {
      "epoch": 0.08749,
      "grad_norm": 1.1646233630246605,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 8749
    },
    {
      "epoch": 0.0875,
      "grad_norm": 0.9882478074471066,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 8750
    },
    {
      "epoch": 0.08751,
      "grad_norm": 1.0628828379350737,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 8751
    },
    {
      "epoch": 0.08752,
      "grad_norm": 1.0984158397543857,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 8752
    },
    {
      "epoch": 0.08753,
      "grad_norm": 1.2193128582910855,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 8753
    },
    {
      "epoch": 0.08754,
      "grad_norm": 1.2385678850122033,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 8754
    },
    {
      "epoch": 0.08755,
      "grad_norm": 0.9211225646462056,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 8755
    },
    {
      "epoch": 0.08756,
      "grad_norm": 1.0770635030360587,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 8756
    },
    {
      "epoch": 0.08757,
      "grad_norm": 1.1930560657485945,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 8757
    },
    {
      "epoch": 0.08758,
      "grad_norm": 1.1384112467498324,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 8758
    },
    {
      "epoch": 0.08759,
      "grad_norm": 1.1172445551058783,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 8759
    },
    {
      "epoch": 0.0876,
      "grad_norm": 1.1801597872088998,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 8760
    },
    {
      "epoch": 0.08761,
      "grad_norm": 0.9352294073790094,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 8761
    },
    {
      "epoch": 0.08762,
      "grad_norm": 0.8823537867998336,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 8762
    },
    {
      "epoch": 0.08763,
      "grad_norm": 1.0898587675324372,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 8763
    },
    {
      "epoch": 0.08764,
      "grad_norm": 0.9778108833971553,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 8764
    },
    {
      "epoch": 0.08765,
      "grad_norm": 1.1127307572804943,
      "learning_rate": 0.003,
      "loss": 4.0024,
      "step": 8765
    },
    {
      "epoch": 0.08766,
      "grad_norm": 1.064542658962637,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 8766
    },
    {
      "epoch": 0.08767,
      "grad_norm": 1.2769881975725046,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 8767
    },
    {
      "epoch": 0.08768,
      "grad_norm": 0.9716458694094685,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 8768
    },
    {
      "epoch": 0.08769,
      "grad_norm": 1.02263370249674,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 8769
    },
    {
      "epoch": 0.0877,
      "grad_norm": 1.1197891339803154,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 8770
    },
    {
      "epoch": 0.08771,
      "grad_norm": 1.0343547006779303,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 8771
    },
    {
      "epoch": 0.08772,
      "grad_norm": 1.1391338162247877,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 8772
    },
    {
      "epoch": 0.08773,
      "grad_norm": 1.133183973465544,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 8773
    },
    {
      "epoch": 0.08774,
      "grad_norm": 0.9767687479388363,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 8774
    },
    {
      "epoch": 0.08775,
      "grad_norm": 1.0614747177631898,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 8775
    },
    {
      "epoch": 0.08776,
      "grad_norm": 1.2783088947042238,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 8776
    },
    {
      "epoch": 0.08777,
      "grad_norm": 0.7758444140217153,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 8777
    },
    {
      "epoch": 0.08778,
      "grad_norm": 0.933691111864922,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 8778
    },
    {
      "epoch": 0.08779,
      "grad_norm": 1.1241941614649351,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 8779
    },
    {
      "epoch": 0.0878,
      "grad_norm": 1.2838395411494257,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 8780
    },
    {
      "epoch": 0.08781,
      "grad_norm": 1.0245994272171686,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 8781
    },
    {
      "epoch": 0.08782,
      "grad_norm": 1.166015780846121,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 8782
    },
    {
      "epoch": 0.08783,
      "grad_norm": 0.94136343088169,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 8783
    },
    {
      "epoch": 0.08784,
      "grad_norm": 1.0537644571277438,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 8784
    },
    {
      "epoch": 0.08785,
      "grad_norm": 1.188275684770546,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 8785
    },
    {
      "epoch": 0.08786,
      "grad_norm": 0.9863606748261747,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 8786
    },
    {
      "epoch": 0.08787,
      "grad_norm": 1.128286694821681,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 8787
    },
    {
      "epoch": 0.08788,
      "grad_norm": 1.079026401112349,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 8788
    },
    {
      "epoch": 0.08789,
      "grad_norm": 1.1880750581034827,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 8789
    },
    {
      "epoch": 0.0879,
      "grad_norm": 1.0176004388763833,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 8790
    },
    {
      "epoch": 0.08791,
      "grad_norm": 1.2038507485153178,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 8791
    },
    {
      "epoch": 0.08792,
      "grad_norm": 0.9980217290009189,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 8792
    },
    {
      "epoch": 0.08793,
      "grad_norm": 1.1566691894267236,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 8793
    },
    {
      "epoch": 0.08794,
      "grad_norm": 1.0311955693118802,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 8794
    },
    {
      "epoch": 0.08795,
      "grad_norm": 1.0735551697097647,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 8795
    },
    {
      "epoch": 0.08796,
      "grad_norm": 1.2673748855568097,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 8796
    },
    {
      "epoch": 0.08797,
      "grad_norm": 1.0868436946789142,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 8797
    },
    {
      "epoch": 0.08798,
      "grad_norm": 1.042119281795597,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 8798
    },
    {
      "epoch": 0.08799,
      "grad_norm": 1.0602674706510509,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 8799
    },
    {
      "epoch": 0.088,
      "grad_norm": 1.008077319870118,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 8800
    },
    {
      "epoch": 0.08801,
      "grad_norm": 1.0969522818507715,
      "learning_rate": 0.003,
      "loss": 4.0028,
      "step": 8801
    },
    {
      "epoch": 0.08802,
      "grad_norm": 1.1376221068560126,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 8802
    },
    {
      "epoch": 0.08803,
      "grad_norm": 1.0879978004992539,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 8803
    },
    {
      "epoch": 0.08804,
      "grad_norm": 1.1606225965715604,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 8804
    },
    {
      "epoch": 0.08805,
      "grad_norm": 0.966413249750066,
      "learning_rate": 0.003,
      "loss": 4.015,
      "step": 8805
    },
    {
      "epoch": 0.08806,
      "grad_norm": 1.178861743093662,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 8806
    },
    {
      "epoch": 0.08807,
      "grad_norm": 0.9970083203505135,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 8807
    },
    {
      "epoch": 0.08808,
      "grad_norm": 1.148232319300287,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 8808
    },
    {
      "epoch": 0.08809,
      "grad_norm": 1.02006120805353,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 8809
    },
    {
      "epoch": 0.0881,
      "grad_norm": 1.2541318447732,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 8810
    },
    {
      "epoch": 0.08811,
      "grad_norm": 0.9654813555045425,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 8811
    },
    {
      "epoch": 0.08812,
      "grad_norm": 1.0815748935370195,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 8812
    },
    {
      "epoch": 0.08813,
      "grad_norm": 1.2017717029030808,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 8813
    },
    {
      "epoch": 0.08814,
      "grad_norm": 1.061652224882425,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 8814
    },
    {
      "epoch": 0.08815,
      "grad_norm": 1.1941981502914418,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 8815
    },
    {
      "epoch": 0.08816,
      "grad_norm": 0.9722887512032639,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 8816
    },
    {
      "epoch": 0.08817,
      "grad_norm": 1.1137225093485594,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 8817
    },
    {
      "epoch": 0.08818,
      "grad_norm": 0.9632280469657819,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 8818
    },
    {
      "epoch": 0.08819,
      "grad_norm": 1.2912552481164463,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 8819
    },
    {
      "epoch": 0.0882,
      "grad_norm": 1.2656702522604,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 8820
    },
    {
      "epoch": 0.08821,
      "grad_norm": 0.8641185704105159,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 8821
    },
    {
      "epoch": 0.08822,
      "grad_norm": 0.8526858641972325,
      "learning_rate": 0.003,
      "loss": 4.0033,
      "step": 8822
    },
    {
      "epoch": 0.08823,
      "grad_norm": 0.9761138588500732,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 8823
    },
    {
      "epoch": 0.08824,
      "grad_norm": 0.9990171985622588,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 8824
    },
    {
      "epoch": 0.08825,
      "grad_norm": 0.9853141720864859,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 8825
    },
    {
      "epoch": 0.08826,
      "grad_norm": 1.216924716523876,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 8826
    },
    {
      "epoch": 0.08827,
      "grad_norm": 1.135457838710045,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 8827
    },
    {
      "epoch": 0.08828,
      "grad_norm": 0.9724238442453864,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 8828
    },
    {
      "epoch": 0.08829,
      "grad_norm": 1.1262292498469912,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 8829
    },
    {
      "epoch": 0.0883,
      "grad_norm": 0.94574518398273,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 8830
    },
    {
      "epoch": 0.08831,
      "grad_norm": 1.1897119041270636,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 8831
    },
    {
      "epoch": 0.08832,
      "grad_norm": 1.2115168343495315,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 8832
    },
    {
      "epoch": 0.08833,
      "grad_norm": 0.996139762537626,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 8833
    },
    {
      "epoch": 0.08834,
      "grad_norm": 1.2368135937696196,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 8834
    },
    {
      "epoch": 0.08835,
      "grad_norm": 0.9570660017751258,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 8835
    },
    {
      "epoch": 0.08836,
      "grad_norm": 0.9995686259154789,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 8836
    },
    {
      "epoch": 0.08837,
      "grad_norm": 1.170370927533513,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 8837
    },
    {
      "epoch": 0.08838,
      "grad_norm": 1.0993199810585577,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 8838
    },
    {
      "epoch": 0.08839,
      "grad_norm": 1.258419211501553,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 8839
    },
    {
      "epoch": 0.0884,
      "grad_norm": 0.9274678150150035,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 8840
    },
    {
      "epoch": 0.08841,
      "grad_norm": 1.055913242044825,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 8841
    },
    {
      "epoch": 0.08842,
      "grad_norm": 1.1426746785347814,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 8842
    },
    {
      "epoch": 0.08843,
      "grad_norm": 1.1251979418413638,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 8843
    },
    {
      "epoch": 0.08844,
      "grad_norm": 1.0852468922230825,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 8844
    },
    {
      "epoch": 0.08845,
      "grad_norm": 1.1567649884610698,
      "learning_rate": 0.003,
      "loss": 3.9994,
      "step": 8845
    },
    {
      "epoch": 0.08846,
      "grad_norm": 1.082231327966706,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 8846
    },
    {
      "epoch": 0.08847,
      "grad_norm": 1.3745652698854023,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 8847
    },
    {
      "epoch": 0.08848,
      "grad_norm": 0.9014879193366324,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 8848
    },
    {
      "epoch": 0.08849,
      "grad_norm": 1.3017860095067897,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 8849
    },
    {
      "epoch": 0.0885,
      "grad_norm": 1.1398535241359604,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 8850
    },
    {
      "epoch": 0.08851,
      "grad_norm": 1.2652223839390064,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 8851
    },
    {
      "epoch": 0.08852,
      "grad_norm": 0.8569529658036033,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 8852
    },
    {
      "epoch": 0.08853,
      "grad_norm": 0.8417432899055782,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 8853
    },
    {
      "epoch": 0.08854,
      "grad_norm": 0.9651867070757068,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 8854
    },
    {
      "epoch": 0.08855,
      "grad_norm": 1.024221501922812,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 8855
    },
    {
      "epoch": 0.08856,
      "grad_norm": 1.1804109249837946,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 8856
    },
    {
      "epoch": 0.08857,
      "grad_norm": 0.955306519494686,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 8857
    },
    {
      "epoch": 0.08858,
      "grad_norm": 1.0294596526147064,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 8858
    },
    {
      "epoch": 0.08859,
      "grad_norm": 1.0845239322361329,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 8859
    },
    {
      "epoch": 0.0886,
      "grad_norm": 1.0515539677387107,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 8860
    },
    {
      "epoch": 0.08861,
      "grad_norm": 1.1312815390155653,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 8861
    },
    {
      "epoch": 0.08862,
      "grad_norm": 1.2225356916127106,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 8862
    },
    {
      "epoch": 0.08863,
      "grad_norm": 1.1065958969223766,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 8863
    },
    {
      "epoch": 0.08864,
      "grad_norm": 0.8686988551731697,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 8864
    },
    {
      "epoch": 0.08865,
      "grad_norm": 0.9406066773821836,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 8865
    },
    {
      "epoch": 0.08866,
      "grad_norm": 1.1068806392407193,
      "learning_rate": 0.003,
      "loss": 4.0163,
      "step": 8866
    },
    {
      "epoch": 0.08867,
      "grad_norm": 1.0109355114141556,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 8867
    },
    {
      "epoch": 0.08868,
      "grad_norm": 1.0297444159445588,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 8868
    },
    {
      "epoch": 0.08869,
      "grad_norm": 1.1859657375260197,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 8869
    },
    {
      "epoch": 0.0887,
      "grad_norm": 1.0367364996267392,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 8870
    },
    {
      "epoch": 0.08871,
      "grad_norm": 1.1770481681438267,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 8871
    },
    {
      "epoch": 0.08872,
      "grad_norm": 1.055437903261702,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 8872
    },
    {
      "epoch": 0.08873,
      "grad_norm": 1.1436383402758115,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 8873
    },
    {
      "epoch": 0.08874,
      "grad_norm": 1.0270645045752425,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 8874
    },
    {
      "epoch": 0.08875,
      "grad_norm": 1.2691814590014312,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 8875
    },
    {
      "epoch": 0.08876,
      "grad_norm": 1.0685113087608666,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 8876
    },
    {
      "epoch": 0.08877,
      "grad_norm": 0.9066843918350342,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 8877
    },
    {
      "epoch": 0.08878,
      "grad_norm": 1.1136428586139093,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 8878
    },
    {
      "epoch": 0.08879,
      "grad_norm": 1.1426232349637628,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 8879
    },
    {
      "epoch": 0.0888,
      "grad_norm": 0.9763661504928037,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 8880
    },
    {
      "epoch": 0.08881,
      "grad_norm": 0.9760789964859367,
      "learning_rate": 0.003,
      "loss": 4.01,
      "step": 8881
    },
    {
      "epoch": 0.08882,
      "grad_norm": 0.993562866244837,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 8882
    },
    {
      "epoch": 0.08883,
      "grad_norm": 1.1608396003529295,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 8883
    },
    {
      "epoch": 0.08884,
      "grad_norm": 1.1323551569394956,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 8884
    },
    {
      "epoch": 0.08885,
      "grad_norm": 1.2349009939188973,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 8885
    },
    {
      "epoch": 0.08886,
      "grad_norm": 1.1914990238513254,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 8886
    },
    {
      "epoch": 0.08887,
      "grad_norm": 1.0598046198071127,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 8887
    },
    {
      "epoch": 0.08888,
      "grad_norm": 1.1625466358037462,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 8888
    },
    {
      "epoch": 0.08889,
      "grad_norm": 0.9052379335151111,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 8889
    },
    {
      "epoch": 0.0889,
      "grad_norm": 0.9819634809947653,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 8890
    },
    {
      "epoch": 0.08891,
      "grad_norm": 1.093718586313721,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 8891
    },
    {
      "epoch": 0.08892,
      "grad_norm": 1.213161423531245,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 8892
    },
    {
      "epoch": 0.08893,
      "grad_norm": 1.0485073867558885,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 8893
    },
    {
      "epoch": 0.08894,
      "grad_norm": 1.2824133272408857,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 8894
    },
    {
      "epoch": 0.08895,
      "grad_norm": 0.8423439384851096,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 8895
    },
    {
      "epoch": 0.08896,
      "grad_norm": 1.0799448562495333,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 8896
    },
    {
      "epoch": 0.08897,
      "grad_norm": 1.4422293408188802,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 8897
    },
    {
      "epoch": 0.08898,
      "grad_norm": 1.032118344307113,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 8898
    },
    {
      "epoch": 0.08899,
      "grad_norm": 1.0522805801471267,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 8899
    },
    {
      "epoch": 0.089,
      "grad_norm": 1.0679360633133137,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 8900
    },
    {
      "epoch": 0.08901,
      "grad_norm": 1.2000864304391685,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 8901
    },
    {
      "epoch": 0.08902,
      "grad_norm": 1.0038739754064978,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 8902
    },
    {
      "epoch": 0.08903,
      "grad_norm": 1.4580752514981392,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 8903
    },
    {
      "epoch": 0.08904,
      "grad_norm": 0.9501849947581935,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 8904
    },
    {
      "epoch": 0.08905,
      "grad_norm": 1.0865285261488837,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 8905
    },
    {
      "epoch": 0.08906,
      "grad_norm": 1.192560315216147,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 8906
    },
    {
      "epoch": 0.08907,
      "grad_norm": 0.86974732874746,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 8907
    },
    {
      "epoch": 0.08908,
      "grad_norm": 1.1193535519892115,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 8908
    },
    {
      "epoch": 0.08909,
      "grad_norm": 1.053123300357203,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 8909
    },
    {
      "epoch": 0.0891,
      "grad_norm": 1.1339836887698285,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 8910
    },
    {
      "epoch": 0.08911,
      "grad_norm": 0.9277496841752985,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 8911
    },
    {
      "epoch": 0.08912,
      "grad_norm": 0.9315539212514233,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 8912
    },
    {
      "epoch": 0.08913,
      "grad_norm": 1.0576553940280595,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 8913
    },
    {
      "epoch": 0.08914,
      "grad_norm": 1.1640237850058512,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 8914
    },
    {
      "epoch": 0.08915,
      "grad_norm": 1.0188304936594577,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 8915
    },
    {
      "epoch": 0.08916,
      "grad_norm": 1.426269112438244,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 8916
    },
    {
      "epoch": 0.08917,
      "grad_norm": 1.0546818705564507,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 8917
    },
    {
      "epoch": 0.08918,
      "grad_norm": 1.2200135436078994,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 8918
    },
    {
      "epoch": 0.08919,
      "grad_norm": 0.8818184266787755,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 8919
    },
    {
      "epoch": 0.0892,
      "grad_norm": 0.9755990420691693,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 8920
    },
    {
      "epoch": 0.08921,
      "grad_norm": 1.2471123516060463,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 8921
    },
    {
      "epoch": 0.08922,
      "grad_norm": 0.8822858234291155,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 8922
    },
    {
      "epoch": 0.08923,
      "grad_norm": 0.9810038940078862,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 8923
    },
    {
      "epoch": 0.08924,
      "grad_norm": 1.2492114317483891,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 8924
    },
    {
      "epoch": 0.08925,
      "grad_norm": 0.9702532902239348,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 8925
    },
    {
      "epoch": 0.08926,
      "grad_norm": 1.2387626568378134,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 8926
    },
    {
      "epoch": 0.08927,
      "grad_norm": 1.0513850982219397,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 8927
    },
    {
      "epoch": 0.08928,
      "grad_norm": 1.1575708697727565,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 8928
    },
    {
      "epoch": 0.08929,
      "grad_norm": 1.1942394751331307,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 8929
    },
    {
      "epoch": 0.0893,
      "grad_norm": 0.9099352819617809,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 8930
    },
    {
      "epoch": 0.08931,
      "grad_norm": 1.1574700090391592,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 8931
    },
    {
      "epoch": 0.08932,
      "grad_norm": 1.1712040522318599,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 8932
    },
    {
      "epoch": 0.08933,
      "grad_norm": 1.1298101748632317,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 8933
    },
    {
      "epoch": 0.08934,
      "grad_norm": 0.8845864177793066,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 8934
    },
    {
      "epoch": 0.08935,
      "grad_norm": 0.9728746053736606,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 8935
    },
    {
      "epoch": 0.08936,
      "grad_norm": 1.1064146202001022,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 8936
    },
    {
      "epoch": 0.08937,
      "grad_norm": 1.1728943500806064,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 8937
    },
    {
      "epoch": 0.08938,
      "grad_norm": 1.1024217274450334,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 8938
    },
    {
      "epoch": 0.08939,
      "grad_norm": 1.0804309602863862,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 8939
    },
    {
      "epoch": 0.0894,
      "grad_norm": 1.1974313951117248,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 8940
    },
    {
      "epoch": 0.08941,
      "grad_norm": 1.0472060105935805,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 8941
    },
    {
      "epoch": 0.08942,
      "grad_norm": 1.1849247191126862,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 8942
    },
    {
      "epoch": 0.08943,
      "grad_norm": 1.066833548679808,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 8943
    },
    {
      "epoch": 0.08944,
      "grad_norm": 1.1269339808223942,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 8944
    },
    {
      "epoch": 0.08945,
      "grad_norm": 1.163904186867617,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 8945
    },
    {
      "epoch": 0.08946,
      "grad_norm": 1.1313006895031748,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 8946
    },
    {
      "epoch": 0.08947,
      "grad_norm": 1.2553855338780335,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 8947
    },
    {
      "epoch": 0.08948,
      "grad_norm": 1.1756270052552653,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 8948
    },
    {
      "epoch": 0.08949,
      "grad_norm": 0.9764941068571328,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 8949
    },
    {
      "epoch": 0.0895,
      "grad_norm": 1.0240642198990701,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 8950
    },
    {
      "epoch": 0.08951,
      "grad_norm": 1.2604446368244644,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 8951
    },
    {
      "epoch": 0.08952,
      "grad_norm": 1.069105761223158,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 8952
    },
    {
      "epoch": 0.08953,
      "grad_norm": 1.2897654616156728,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 8953
    },
    {
      "epoch": 0.08954,
      "grad_norm": 0.8927523209357381,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 8954
    },
    {
      "epoch": 0.08955,
      "grad_norm": 0.7994347598547118,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 8955
    },
    {
      "epoch": 0.08956,
      "grad_norm": 0.7721118918818628,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 8956
    },
    {
      "epoch": 0.08957,
      "grad_norm": 0.888735574661208,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 8957
    },
    {
      "epoch": 0.08958,
      "grad_norm": 1.011303363408731,
      "learning_rate": 0.003,
      "loss": 4.0063,
      "step": 8958
    },
    {
      "epoch": 0.08959,
      "grad_norm": 1.0580277710381667,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 8959
    },
    {
      "epoch": 0.0896,
      "grad_norm": 1.1815138015719915,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 8960
    },
    {
      "epoch": 0.08961,
      "grad_norm": 1.0763278082170449,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 8961
    },
    {
      "epoch": 0.08962,
      "grad_norm": 1.1547543247106453,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 8962
    },
    {
      "epoch": 0.08963,
      "grad_norm": 0.9474596852663282,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 8963
    },
    {
      "epoch": 0.08964,
      "grad_norm": 0.9994796332550057,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 8964
    },
    {
      "epoch": 0.08965,
      "grad_norm": 1.0971047941927174,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 8965
    },
    {
      "epoch": 0.08966,
      "grad_norm": 0.9978749462040863,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 8966
    },
    {
      "epoch": 0.08967,
      "grad_norm": 1.091462772934639,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 8967
    },
    {
      "epoch": 0.08968,
      "grad_norm": 0.9928173167320807,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 8968
    },
    {
      "epoch": 0.08969,
      "grad_norm": 1.2786532555094048,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 8969
    },
    {
      "epoch": 0.0897,
      "grad_norm": 0.9304812924425294,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 8970
    },
    {
      "epoch": 0.08971,
      "grad_norm": 1.029273303406794,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 8971
    },
    {
      "epoch": 0.08972,
      "grad_norm": 1.1442621209111814,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 8972
    },
    {
      "epoch": 0.08973,
      "grad_norm": 1.0166706811028725,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 8973
    },
    {
      "epoch": 0.08974,
      "grad_norm": 1.3160111354791268,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 8974
    },
    {
      "epoch": 0.08975,
      "grad_norm": 0.9964973979910691,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 8975
    },
    {
      "epoch": 0.08976,
      "grad_norm": 1.1506428299487088,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 8976
    },
    {
      "epoch": 0.08977,
      "grad_norm": 1.0330018963705825,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 8977
    },
    {
      "epoch": 0.08978,
      "grad_norm": 1.0887492374364582,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 8978
    },
    {
      "epoch": 0.08979,
      "grad_norm": 1.1346166358786325,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 8979
    },
    {
      "epoch": 0.0898,
      "grad_norm": 1.21619016375306,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 8980
    },
    {
      "epoch": 0.08981,
      "grad_norm": 0.9851538869152199,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 8981
    },
    {
      "epoch": 0.08982,
      "grad_norm": 1.2126645439081598,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 8982
    },
    {
      "epoch": 0.08983,
      "grad_norm": 1.0447731466887387,
      "learning_rate": 0.003,
      "loss": 4.0122,
      "step": 8983
    },
    {
      "epoch": 0.08984,
      "grad_norm": 1.1497594183247177,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 8984
    },
    {
      "epoch": 0.08985,
      "grad_norm": 1.2483832316844437,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 8985
    },
    {
      "epoch": 0.08986,
      "grad_norm": 1.2191689592626789,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 8986
    },
    {
      "epoch": 0.08987,
      "grad_norm": 1.076658986801492,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 8987
    },
    {
      "epoch": 0.08988,
      "grad_norm": 1.2479820133910688,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 8988
    },
    {
      "epoch": 0.08989,
      "grad_norm": 0.9034809767704942,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 8989
    },
    {
      "epoch": 0.0899,
      "grad_norm": 1.1404867965095085,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 8990
    },
    {
      "epoch": 0.08991,
      "grad_norm": 1.073000683119538,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 8991
    },
    {
      "epoch": 0.08992,
      "grad_norm": 1.2207306198770145,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 8992
    },
    {
      "epoch": 0.08993,
      "grad_norm": 1.0842166692368216,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 8993
    },
    {
      "epoch": 0.08994,
      "grad_norm": 1.0045211383021964,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 8994
    },
    {
      "epoch": 0.08995,
      "grad_norm": 1.316656487044687,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 8995
    },
    {
      "epoch": 0.08996,
      "grad_norm": 0.9870905235403588,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 8996
    },
    {
      "epoch": 0.08997,
      "grad_norm": 1.1131944813902874,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 8997
    },
    {
      "epoch": 0.08998,
      "grad_norm": 0.9575251947968014,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 8998
    },
    {
      "epoch": 0.08999,
      "grad_norm": 1.079155634238572,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 8999
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3073657014522342,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9000
    },
    {
      "epoch": 0.09001,
      "grad_norm": 0.9878617942632868,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 9001
    },
    {
      "epoch": 0.09002,
      "grad_norm": 1.4475702739335359,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 9002
    },
    {
      "epoch": 0.09003,
      "grad_norm": 1.0308588085625798,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 9003
    },
    {
      "epoch": 0.09004,
      "grad_norm": 1.221089293392804,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 9004
    },
    {
      "epoch": 0.09005,
      "grad_norm": 1.0231796835565534,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 9005
    },
    {
      "epoch": 0.09006,
      "grad_norm": 1.0417871456423533,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 9006
    },
    {
      "epoch": 0.09007,
      "grad_norm": 1.1191814093275658,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 9007
    },
    {
      "epoch": 0.09008,
      "grad_norm": 1.1348119020549952,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 9008
    },
    {
      "epoch": 0.09009,
      "grad_norm": 1.1633559345033404,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 9009
    },
    {
      "epoch": 0.0901,
      "grad_norm": 1.1458799262789814,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 9010
    },
    {
      "epoch": 0.09011,
      "grad_norm": 1.0636561316185342,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 9011
    },
    {
      "epoch": 0.09012,
      "grad_norm": 1.1477528884261912,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 9012
    },
    {
      "epoch": 0.09013,
      "grad_norm": 1.0045238113278767,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 9013
    },
    {
      "epoch": 0.09014,
      "grad_norm": 1.1254428234025435,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 9014
    },
    {
      "epoch": 0.09015,
      "grad_norm": 1.1052529228915646,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 9015
    },
    {
      "epoch": 0.09016,
      "grad_norm": 1.0143709342989635,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 9016
    },
    {
      "epoch": 0.09017,
      "grad_norm": 1.0995658886429716,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 9017
    },
    {
      "epoch": 0.09018,
      "grad_norm": 1.0029225082537132,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 9018
    },
    {
      "epoch": 0.09019,
      "grad_norm": 1.3935530723752598,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 9019
    },
    {
      "epoch": 0.0902,
      "grad_norm": 0.9681733776693029,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 9020
    },
    {
      "epoch": 0.09021,
      "grad_norm": 1.3687701542155535,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 9021
    },
    {
      "epoch": 0.09022,
      "grad_norm": 0.9332102242731483,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 9022
    },
    {
      "epoch": 0.09023,
      "grad_norm": 1.0483328137620929,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 9023
    },
    {
      "epoch": 0.09024,
      "grad_norm": 1.1150169635102571,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 9024
    },
    {
      "epoch": 0.09025,
      "grad_norm": 1.2002769998310732,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 9025
    },
    {
      "epoch": 0.09026,
      "grad_norm": 1.2011034969162588,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 9026
    },
    {
      "epoch": 0.09027,
      "grad_norm": 1.146578370959906,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 9027
    },
    {
      "epoch": 0.09028,
      "grad_norm": 1.2228846480536117,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 9028
    },
    {
      "epoch": 0.09029,
      "grad_norm": 1.037224821411794,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 9029
    },
    {
      "epoch": 0.0903,
      "grad_norm": 0.9844125119573323,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 9030
    },
    {
      "epoch": 0.09031,
      "grad_norm": 1.1495286013814523,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 9031
    },
    {
      "epoch": 0.09032,
      "grad_norm": 1.1742914278318812,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 9032
    },
    {
      "epoch": 0.09033,
      "grad_norm": 1.3904421332901014,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 9033
    },
    {
      "epoch": 0.09034,
      "grad_norm": 1.0278814470733673,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 9034
    },
    {
      "epoch": 0.09035,
      "grad_norm": 1.1229673655711763,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 9035
    },
    {
      "epoch": 0.09036,
      "grad_norm": 0.9056875520752794,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 9036
    },
    {
      "epoch": 0.09037,
      "grad_norm": 1.1006054177092766,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 9037
    },
    {
      "epoch": 0.09038,
      "grad_norm": 1.1348621040993052,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 9038
    },
    {
      "epoch": 0.09039,
      "grad_norm": 1.05381794931122,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 9039
    },
    {
      "epoch": 0.0904,
      "grad_norm": 1.0109885109102685,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 9040
    },
    {
      "epoch": 0.09041,
      "grad_norm": 1.123512949286327,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 9041
    },
    {
      "epoch": 0.09042,
      "grad_norm": 1.0917985051699848,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 9042
    },
    {
      "epoch": 0.09043,
      "grad_norm": 1.084941390137943,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 9043
    },
    {
      "epoch": 0.09044,
      "grad_norm": 1.12950639714153,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 9044
    },
    {
      "epoch": 0.09045,
      "grad_norm": 0.9435189761788078,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 9045
    },
    {
      "epoch": 0.09046,
      "grad_norm": 1.1205216961300428,
      "learning_rate": 0.003,
      "loss": 4.0083,
      "step": 9046
    },
    {
      "epoch": 0.09047,
      "grad_norm": 1.1101023596152269,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 9047
    },
    {
      "epoch": 0.09048,
      "grad_norm": 1.4711338068553876,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 9048
    },
    {
      "epoch": 0.09049,
      "grad_norm": 0.7958927509366164,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 9049
    },
    {
      "epoch": 0.0905,
      "grad_norm": 0.8137554584809007,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 9050
    },
    {
      "epoch": 0.09051,
      "grad_norm": 0.9025372956999882,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 9051
    },
    {
      "epoch": 0.09052,
      "grad_norm": 0.8871827348097391,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 9052
    },
    {
      "epoch": 0.09053,
      "grad_norm": 1.0593382418378432,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 9053
    },
    {
      "epoch": 0.09054,
      "grad_norm": 1.185043130945484,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 9054
    },
    {
      "epoch": 0.09055,
      "grad_norm": 1.123987691026757,
      "learning_rate": 0.003,
      "loss": 4.0151,
      "step": 9055
    },
    {
      "epoch": 0.09056,
      "grad_norm": 1.0852908797471614,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 9056
    },
    {
      "epoch": 0.09057,
      "grad_norm": 0.9184881520876066,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 9057
    },
    {
      "epoch": 0.09058,
      "grad_norm": 0.8821076958966486,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 9058
    },
    {
      "epoch": 0.09059,
      "grad_norm": 0.9653080945803115,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 9059
    },
    {
      "epoch": 0.0906,
      "grad_norm": 1.157246811985134,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 9060
    },
    {
      "epoch": 0.09061,
      "grad_norm": 1.106358395927521,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 9061
    },
    {
      "epoch": 0.09062,
      "grad_norm": 1.1308906707844582,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 9062
    },
    {
      "epoch": 0.09063,
      "grad_norm": 1.3615134157248625,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 9063
    },
    {
      "epoch": 0.09064,
      "grad_norm": 0.9632769176971355,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 9064
    },
    {
      "epoch": 0.09065,
      "grad_norm": 1.193055845335768,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 9065
    },
    {
      "epoch": 0.09066,
      "grad_norm": 1.0217847841679015,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 9066
    },
    {
      "epoch": 0.09067,
      "grad_norm": 1.2708025422078024,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 9067
    },
    {
      "epoch": 0.09068,
      "grad_norm": 0.999091796379082,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 9068
    },
    {
      "epoch": 0.09069,
      "grad_norm": 1.1937745015052195,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 9069
    },
    {
      "epoch": 0.0907,
      "grad_norm": 1.2267119910601545,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 9070
    },
    {
      "epoch": 0.09071,
      "grad_norm": 0.9307012025024503,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 9071
    },
    {
      "epoch": 0.09072,
      "grad_norm": 1.0537322841979997,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 9072
    },
    {
      "epoch": 0.09073,
      "grad_norm": 1.2016861411462987,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 9073
    },
    {
      "epoch": 0.09074,
      "grad_norm": 0.8496237355918259,
      "learning_rate": 0.003,
      "loss": 4.0081,
      "step": 9074
    },
    {
      "epoch": 0.09075,
      "grad_norm": 0.9908939634552881,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 9075
    },
    {
      "epoch": 0.09076,
      "grad_norm": 1.1968746368032812,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 9076
    },
    {
      "epoch": 0.09077,
      "grad_norm": 0.970351786121277,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 9077
    },
    {
      "epoch": 0.09078,
      "grad_norm": 1.1497763383817403,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 9078
    },
    {
      "epoch": 0.09079,
      "grad_norm": 1.0830961717463246,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 9079
    },
    {
      "epoch": 0.0908,
      "grad_norm": 0.9609418328913767,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 9080
    },
    {
      "epoch": 0.09081,
      "grad_norm": 1.1000854820573318,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 9081
    },
    {
      "epoch": 0.09082,
      "grad_norm": 1.1636952655382402,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 9082
    },
    {
      "epoch": 0.09083,
      "grad_norm": 1.1420961957835405,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 9083
    },
    {
      "epoch": 0.09084,
      "grad_norm": 1.0553826184749373,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 9084
    },
    {
      "epoch": 0.09085,
      "grad_norm": 1.0255413615584303,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 9085
    },
    {
      "epoch": 0.09086,
      "grad_norm": 1.215452301963506,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 9086
    },
    {
      "epoch": 0.09087,
      "grad_norm": 0.9583487655084203,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 9087
    },
    {
      "epoch": 0.09088,
      "grad_norm": 1.232091628428844,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 9088
    },
    {
      "epoch": 0.09089,
      "grad_norm": 1.1137319600932658,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 9089
    },
    {
      "epoch": 0.0909,
      "grad_norm": 1.144755645285606,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 9090
    },
    {
      "epoch": 0.09091,
      "grad_norm": 1.0300971496763878,
      "learning_rate": 0.003,
      "loss": 4.0116,
      "step": 9091
    },
    {
      "epoch": 0.09092,
      "grad_norm": 1.0100476409907833,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 9092
    },
    {
      "epoch": 0.09093,
      "grad_norm": 1.1494952695266714,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 9093
    },
    {
      "epoch": 0.09094,
      "grad_norm": 1.0095676827058278,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 9094
    },
    {
      "epoch": 0.09095,
      "grad_norm": 1.2093302711410523,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 9095
    },
    {
      "epoch": 0.09096,
      "grad_norm": 0.976858163705672,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 9096
    },
    {
      "epoch": 0.09097,
      "grad_norm": 1.3853264618148677,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 9097
    },
    {
      "epoch": 0.09098,
      "grad_norm": 1.0766280332726217,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 9098
    },
    {
      "epoch": 0.09099,
      "grad_norm": 1.1136120589991296,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 9099
    },
    {
      "epoch": 0.091,
      "grad_norm": 1.1601021677903895,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 9100
    },
    {
      "epoch": 0.09101,
      "grad_norm": 1.3417195527068442,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 9101
    },
    {
      "epoch": 0.09102,
      "grad_norm": 0.9070301102163859,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9102
    },
    {
      "epoch": 0.09103,
      "grad_norm": 0.9573335281707542,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 9103
    },
    {
      "epoch": 0.09104,
      "grad_norm": 1.0556076158321603,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 9104
    },
    {
      "epoch": 0.09105,
      "grad_norm": 1.1979973249595275,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 9105
    },
    {
      "epoch": 0.09106,
      "grad_norm": 1.1589318950983944,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 9106
    },
    {
      "epoch": 0.09107,
      "grad_norm": 1.1814885851810335,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 9107
    },
    {
      "epoch": 0.09108,
      "grad_norm": 1.019986448191847,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 9108
    },
    {
      "epoch": 0.09109,
      "grad_norm": 1.057663996654355,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 9109
    },
    {
      "epoch": 0.0911,
      "grad_norm": 1.2256223665855766,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 9110
    },
    {
      "epoch": 0.09111,
      "grad_norm": 0.9461029364575244,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 9111
    },
    {
      "epoch": 0.09112,
      "grad_norm": 1.049429275804046,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 9112
    },
    {
      "epoch": 0.09113,
      "grad_norm": 1.031956399076363,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 9113
    },
    {
      "epoch": 0.09114,
      "grad_norm": 1.0174397153586041,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 9114
    },
    {
      "epoch": 0.09115,
      "grad_norm": 1.2745379192778026,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 9115
    },
    {
      "epoch": 0.09116,
      "grad_norm": 0.9837529257881423,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 9116
    },
    {
      "epoch": 0.09117,
      "grad_norm": 1.1786562942934684,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 9117
    },
    {
      "epoch": 0.09118,
      "grad_norm": 1.0560853969394606,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 9118
    },
    {
      "epoch": 0.09119,
      "grad_norm": 1.0377188929161931,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 9119
    },
    {
      "epoch": 0.0912,
      "grad_norm": 1.1207049150632644,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 9120
    },
    {
      "epoch": 0.09121,
      "grad_norm": 1.0193731745757568,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 9121
    },
    {
      "epoch": 0.09122,
      "grad_norm": 1.2624197726748643,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 9122
    },
    {
      "epoch": 0.09123,
      "grad_norm": 1.072056196669728,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 9123
    },
    {
      "epoch": 0.09124,
      "grad_norm": 1.239441100721789,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 9124
    },
    {
      "epoch": 0.09125,
      "grad_norm": 1.083356005410815,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 9125
    },
    {
      "epoch": 0.09126,
      "grad_norm": 1.266014219278627,
      "learning_rate": 0.003,
      "loss": 4.0144,
      "step": 9126
    },
    {
      "epoch": 0.09127,
      "grad_norm": 0.8598933039930851,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 9127
    },
    {
      "epoch": 0.09128,
      "grad_norm": 0.89093357601972,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 9128
    },
    {
      "epoch": 0.09129,
      "grad_norm": 1.11080170345324,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 9129
    },
    {
      "epoch": 0.0913,
      "grad_norm": 1.1921225096267718,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 9130
    },
    {
      "epoch": 0.09131,
      "grad_norm": 0.9911325172513188,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 9131
    },
    {
      "epoch": 0.09132,
      "grad_norm": 1.210452388671294,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 9132
    },
    {
      "epoch": 0.09133,
      "grad_norm": 1.1006247067475978,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 9133
    },
    {
      "epoch": 0.09134,
      "grad_norm": 1.192482017694382,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 9134
    },
    {
      "epoch": 0.09135,
      "grad_norm": 1.050325724178467,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 9135
    },
    {
      "epoch": 0.09136,
      "grad_norm": 1.2143867555226864,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 9136
    },
    {
      "epoch": 0.09137,
      "grad_norm": 0.9760810890066407,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 9137
    },
    {
      "epoch": 0.09138,
      "grad_norm": 1.1888855045602282,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 9138
    },
    {
      "epoch": 0.09139,
      "grad_norm": 0.9681065989709288,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 9139
    },
    {
      "epoch": 0.0914,
      "grad_norm": 1.3038699850066278,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9140
    },
    {
      "epoch": 0.09141,
      "grad_norm": 0.9119873610357526,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 9141
    },
    {
      "epoch": 0.09142,
      "grad_norm": 0.97753456917582,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 9142
    },
    {
      "epoch": 0.09143,
      "grad_norm": 1.2011099485139207,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 9143
    },
    {
      "epoch": 0.09144,
      "grad_norm": 0.8653559838348887,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 9144
    },
    {
      "epoch": 0.09145,
      "grad_norm": 0.6804719677612727,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 9145
    },
    {
      "epoch": 0.09146,
      "grad_norm": 0.8281509343825025,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 9146
    },
    {
      "epoch": 0.09147,
      "grad_norm": 1.046932924628029,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 9147
    },
    {
      "epoch": 0.09148,
      "grad_norm": 0.9481937690438011,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 9148
    },
    {
      "epoch": 0.09149,
      "grad_norm": 1.1467040624244416,
      "learning_rate": 0.003,
      "loss": 4.0004,
      "step": 9149
    },
    {
      "epoch": 0.0915,
      "grad_norm": 1.2688544713513863,
      "learning_rate": 0.003,
      "loss": 4.0084,
      "step": 9150
    },
    {
      "epoch": 0.09151,
      "grad_norm": 0.9410854425311685,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 9151
    },
    {
      "epoch": 0.09152,
      "grad_norm": 1.0715331038388722,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 9152
    },
    {
      "epoch": 0.09153,
      "grad_norm": 1.011570665232525,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 9153
    },
    {
      "epoch": 0.09154,
      "grad_norm": 1.0783280541516371,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 9154
    },
    {
      "epoch": 0.09155,
      "grad_norm": 1.0855343965090731,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 9155
    },
    {
      "epoch": 0.09156,
      "grad_norm": 1.0564142389909517,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 9156
    },
    {
      "epoch": 0.09157,
      "grad_norm": 1.0945895233993794,
      "learning_rate": 0.003,
      "loss": 4.0027,
      "step": 9157
    },
    {
      "epoch": 0.09158,
      "grad_norm": 1.336449148127849,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 9158
    },
    {
      "epoch": 0.09159,
      "grad_norm": 0.991375084782573,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 9159
    },
    {
      "epoch": 0.0916,
      "grad_norm": 1.0591224251327636,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 9160
    },
    {
      "epoch": 0.09161,
      "grad_norm": 1.164704576534139,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 9161
    },
    {
      "epoch": 0.09162,
      "grad_norm": 0.9413438539519919,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 9162
    },
    {
      "epoch": 0.09163,
      "grad_norm": 0.9714944791493136,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 9163
    },
    {
      "epoch": 0.09164,
      "grad_norm": 1.0287059172603712,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 9164
    },
    {
      "epoch": 0.09165,
      "grad_norm": 1.1861303886211818,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 9165
    },
    {
      "epoch": 0.09166,
      "grad_norm": 1.124122240608982,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 9166
    },
    {
      "epoch": 0.09167,
      "grad_norm": 1.1459093885614982,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 9167
    },
    {
      "epoch": 0.09168,
      "grad_norm": 1.119778165627705,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 9168
    },
    {
      "epoch": 0.09169,
      "grad_norm": 1.1111751785567634,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 9169
    },
    {
      "epoch": 0.0917,
      "grad_norm": 1.2226524744645328,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 9170
    },
    {
      "epoch": 0.09171,
      "grad_norm": 0.8656526632807879,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 9171
    },
    {
      "epoch": 0.09172,
      "grad_norm": 1.099267531571083,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 9172
    },
    {
      "epoch": 0.09173,
      "grad_norm": 1.1162934002071323,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 9173
    },
    {
      "epoch": 0.09174,
      "grad_norm": 1.1000518721341896,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 9174
    },
    {
      "epoch": 0.09175,
      "grad_norm": 1.267658430539927,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 9175
    },
    {
      "epoch": 0.09176,
      "grad_norm": 1.0571564301320642,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 9176
    },
    {
      "epoch": 0.09177,
      "grad_norm": 1.1069807499260333,
      "learning_rate": 0.003,
      "loss": 4.002,
      "step": 9177
    },
    {
      "epoch": 0.09178,
      "grad_norm": 1.0241191541435508,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 9178
    },
    {
      "epoch": 0.09179,
      "grad_norm": 1.2215349660778567,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 9179
    },
    {
      "epoch": 0.0918,
      "grad_norm": 0.9063283480014228,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 9180
    },
    {
      "epoch": 0.09181,
      "grad_norm": 0.8608588303075517,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 9181
    },
    {
      "epoch": 0.09182,
      "grad_norm": 1.080015607354298,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 9182
    },
    {
      "epoch": 0.09183,
      "grad_norm": 1.187987053928514,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 9183
    },
    {
      "epoch": 0.09184,
      "grad_norm": 1.0370353123734928,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 9184
    },
    {
      "epoch": 0.09185,
      "grad_norm": 1.1348969491194556,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 9185
    },
    {
      "epoch": 0.09186,
      "grad_norm": 1.2036726155070432,
      "learning_rate": 0.003,
      "loss": 4.0028,
      "step": 9186
    },
    {
      "epoch": 0.09187,
      "grad_norm": 1.0871963616696243,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 9187
    },
    {
      "epoch": 0.09188,
      "grad_norm": 1.1723509075908214,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 9188
    },
    {
      "epoch": 0.09189,
      "grad_norm": 1.1132456681131127,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 9189
    },
    {
      "epoch": 0.0919,
      "grad_norm": 1.044542911903216,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 9190
    },
    {
      "epoch": 0.09191,
      "grad_norm": 1.2056609382294916,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 9191
    },
    {
      "epoch": 0.09192,
      "grad_norm": 1.0206520860594175,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 9192
    },
    {
      "epoch": 0.09193,
      "grad_norm": 1.1522627595760395,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 9193
    },
    {
      "epoch": 0.09194,
      "grad_norm": 1.036571230879693,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 9194
    },
    {
      "epoch": 0.09195,
      "grad_norm": 1.156868074376868,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 9195
    },
    {
      "epoch": 0.09196,
      "grad_norm": 0.9632802843738769,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 9196
    },
    {
      "epoch": 0.09197,
      "grad_norm": 1.2366512084187664,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 9197
    },
    {
      "epoch": 0.09198,
      "grad_norm": 0.9807141780820184,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 9198
    },
    {
      "epoch": 0.09199,
      "grad_norm": 1.0704002219425641,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 9199
    },
    {
      "epoch": 0.092,
      "grad_norm": 1.1110145741263948,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 9200
    },
    {
      "epoch": 0.09201,
      "grad_norm": 1.090835930160533,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 9201
    },
    {
      "epoch": 0.09202,
      "grad_norm": 1.0317362468150382,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 9202
    },
    {
      "epoch": 0.09203,
      "grad_norm": 1.2267741986981442,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 9203
    },
    {
      "epoch": 0.09204,
      "grad_norm": 1.1325105553589676,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 9204
    },
    {
      "epoch": 0.09205,
      "grad_norm": 1.1638105186655754,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 9205
    },
    {
      "epoch": 0.09206,
      "grad_norm": 1.0692805133374446,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 9206
    },
    {
      "epoch": 0.09207,
      "grad_norm": 1.175551262927089,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 9207
    },
    {
      "epoch": 0.09208,
      "grad_norm": 1.0644580288738281,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 9208
    },
    {
      "epoch": 0.09209,
      "grad_norm": 0.9664436871225908,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 9209
    },
    {
      "epoch": 0.0921,
      "grad_norm": 1.1254680317199708,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 9210
    },
    {
      "epoch": 0.09211,
      "grad_norm": 1.200712594293563,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 9211
    },
    {
      "epoch": 0.09212,
      "grad_norm": 0.9683431414690981,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 9212
    },
    {
      "epoch": 0.09213,
      "grad_norm": 0.944968780879836,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 9213
    },
    {
      "epoch": 0.09214,
      "grad_norm": 1.0413526215293398,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 9214
    },
    {
      "epoch": 0.09215,
      "grad_norm": 1.1772555410236571,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 9215
    },
    {
      "epoch": 0.09216,
      "grad_norm": 0.9624083137396001,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 9216
    },
    {
      "epoch": 0.09217,
      "grad_norm": 1.0511031168385416,
      "learning_rate": 0.003,
      "loss": 3.9953,
      "step": 9217
    },
    {
      "epoch": 0.09218,
      "grad_norm": 1.3269317543115233,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 9218
    },
    {
      "epoch": 0.09219,
      "grad_norm": 0.9923317369891772,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 9219
    },
    {
      "epoch": 0.0922,
      "grad_norm": 1.073030298407734,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 9220
    },
    {
      "epoch": 0.09221,
      "grad_norm": 1.0446285156707587,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 9221
    },
    {
      "epoch": 0.09222,
      "grad_norm": 1.1434810207183548,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 9222
    },
    {
      "epoch": 0.09223,
      "grad_norm": 1.1025201940993825,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 9223
    },
    {
      "epoch": 0.09224,
      "grad_norm": 0.9566021361855169,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 9224
    },
    {
      "epoch": 0.09225,
      "grad_norm": 1.2323077112756355,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 9225
    },
    {
      "epoch": 0.09226,
      "grad_norm": 0.899900247722213,
      "learning_rate": 0.003,
      "loss": 4.0138,
      "step": 9226
    },
    {
      "epoch": 0.09227,
      "grad_norm": 1.0849893705736053,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 9227
    },
    {
      "epoch": 0.09228,
      "grad_norm": 1.2542611304862052,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 9228
    },
    {
      "epoch": 0.09229,
      "grad_norm": 0.8407437326641111,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 9229
    },
    {
      "epoch": 0.0923,
      "grad_norm": 1.158954671915461,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 9230
    },
    {
      "epoch": 0.09231,
      "grad_norm": 1.1704792802830555,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 9231
    },
    {
      "epoch": 0.09232,
      "grad_norm": 1.1029452862218423,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 9232
    },
    {
      "epoch": 0.09233,
      "grad_norm": 1.2002856333009415,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 9233
    },
    {
      "epoch": 0.09234,
      "grad_norm": 0.8750251942831692,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 9234
    },
    {
      "epoch": 0.09235,
      "grad_norm": 1.0716615659635735,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 9235
    },
    {
      "epoch": 0.09236,
      "grad_norm": 1.5813090203643494,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 9236
    },
    {
      "epoch": 0.09237,
      "grad_norm": 1.0338774462879339,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 9237
    },
    {
      "epoch": 0.09238,
      "grad_norm": 1.135346462252927,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 9238
    },
    {
      "epoch": 0.09239,
      "grad_norm": 1.1394764093548577,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 9239
    },
    {
      "epoch": 0.0924,
      "grad_norm": 1.1428354439905772,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 9240
    },
    {
      "epoch": 0.09241,
      "grad_norm": 1.0203493481299601,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 9241
    },
    {
      "epoch": 0.09242,
      "grad_norm": 0.9451602651492341,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 9242
    },
    {
      "epoch": 0.09243,
      "grad_norm": 1.0754642967463526,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 9243
    },
    {
      "epoch": 0.09244,
      "grad_norm": 1.2105994420477935,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 9244
    },
    {
      "epoch": 0.09245,
      "grad_norm": 1.1177071678503647,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 9245
    },
    {
      "epoch": 0.09246,
      "grad_norm": 1.1262767459012595,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 9246
    },
    {
      "epoch": 0.09247,
      "grad_norm": 1.1101169385517686,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 9247
    },
    {
      "epoch": 0.09248,
      "grad_norm": 1.039107535835496,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 9248
    },
    {
      "epoch": 0.09249,
      "grad_norm": 1.051375380829021,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 9249
    },
    {
      "epoch": 0.0925,
      "grad_norm": 1.0818027535404036,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 9250
    },
    {
      "epoch": 0.09251,
      "grad_norm": 1.21441854307002,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 9251
    },
    {
      "epoch": 0.09252,
      "grad_norm": 0.9446590679589214,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 9252
    },
    {
      "epoch": 0.09253,
      "grad_norm": 0.9688539640875207,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 9253
    },
    {
      "epoch": 0.09254,
      "grad_norm": 1.1291024219182868,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 9254
    },
    {
      "epoch": 0.09255,
      "grad_norm": 1.211862634432545,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 9255
    },
    {
      "epoch": 0.09256,
      "grad_norm": 1.2218560976542767,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 9256
    },
    {
      "epoch": 0.09257,
      "grad_norm": 1.0643221369589915,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 9257
    },
    {
      "epoch": 0.09258,
      "grad_norm": 1.1527043491418376,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 9258
    },
    {
      "epoch": 0.09259,
      "grad_norm": 0.9926788558086477,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 9259
    },
    {
      "epoch": 0.0926,
      "grad_norm": 1.0433542223940273,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 9260
    },
    {
      "epoch": 0.09261,
      "grad_norm": 1.207804917706118,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 9261
    },
    {
      "epoch": 0.09262,
      "grad_norm": 1.1324209587635743,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 9262
    },
    {
      "epoch": 0.09263,
      "grad_norm": 1.172369074851875,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 9263
    },
    {
      "epoch": 0.09264,
      "grad_norm": 1.1682667843729466,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 9264
    },
    {
      "epoch": 0.09265,
      "grad_norm": 1.253090916375967,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 9265
    },
    {
      "epoch": 0.09266,
      "grad_norm": 1.1781887620358225,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 9266
    },
    {
      "epoch": 0.09267,
      "grad_norm": 1.0284374738702509,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 9267
    },
    {
      "epoch": 0.09268,
      "grad_norm": 1.1665787828723402,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 9268
    },
    {
      "epoch": 0.09269,
      "grad_norm": 1.1264162171369585,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 9269
    },
    {
      "epoch": 0.0927,
      "grad_norm": 1.1949132258262425,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 9270
    },
    {
      "epoch": 0.09271,
      "grad_norm": 0.9696066482483353,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 9271
    },
    {
      "epoch": 0.09272,
      "grad_norm": 1.0301396773994387,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 9272
    },
    {
      "epoch": 0.09273,
      "grad_norm": 1.0893059409180028,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 9273
    },
    {
      "epoch": 0.09274,
      "grad_norm": 1.0947192726893542,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 9274
    },
    {
      "epoch": 0.09275,
      "grad_norm": 1.0723410183602133,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 9275
    },
    {
      "epoch": 0.09276,
      "grad_norm": 1.1691696906210551,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 9276
    },
    {
      "epoch": 0.09277,
      "grad_norm": 1.0647822469371042,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 9277
    },
    {
      "epoch": 0.09278,
      "grad_norm": 1.2167469789912233,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 9278
    },
    {
      "epoch": 0.09279,
      "grad_norm": 0.9896695614661197,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 9279
    },
    {
      "epoch": 0.0928,
      "grad_norm": 1.3634170937343293,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 9280
    },
    {
      "epoch": 0.09281,
      "grad_norm": 0.8620980137769152,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 9281
    },
    {
      "epoch": 0.09282,
      "grad_norm": 1.0427330391887681,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 9282
    },
    {
      "epoch": 0.09283,
      "grad_norm": 1.1710140561731026,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 9283
    },
    {
      "epoch": 0.09284,
      "grad_norm": 1.4120083965462074,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 9284
    },
    {
      "epoch": 0.09285,
      "grad_norm": 1.0005243949124853,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 9285
    },
    {
      "epoch": 0.09286,
      "grad_norm": 1.2076261862310649,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 9286
    },
    {
      "epoch": 0.09287,
      "grad_norm": 0.9611404585275731,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9287
    },
    {
      "epoch": 0.09288,
      "grad_norm": 0.9896962192199752,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 9288
    },
    {
      "epoch": 0.09289,
      "grad_norm": 1.1918897636488415,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 9289
    },
    {
      "epoch": 0.0929,
      "grad_norm": 1.100511638945172,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 9290
    },
    {
      "epoch": 0.09291,
      "grad_norm": 0.9953764094766955,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 9291
    },
    {
      "epoch": 0.09292,
      "grad_norm": 0.9663852236515597,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 9292
    },
    {
      "epoch": 0.09293,
      "grad_norm": 1.0635777370604826,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 9293
    },
    {
      "epoch": 0.09294,
      "grad_norm": 1.2456008375763008,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 9294
    },
    {
      "epoch": 0.09295,
      "grad_norm": 0.8653819364739406,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 9295
    },
    {
      "epoch": 0.09296,
      "grad_norm": 1.1680264346725173,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 9296
    },
    {
      "epoch": 0.09297,
      "grad_norm": 1.1901508293685317,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 9297
    },
    {
      "epoch": 0.09298,
      "grad_norm": 1.0915606863010314,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 9298
    },
    {
      "epoch": 0.09299,
      "grad_norm": 1.1592108148961227,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 9299
    },
    {
      "epoch": 0.093,
      "grad_norm": 1.0708244671033804,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 9300
    },
    {
      "epoch": 0.09301,
      "grad_norm": 1.1033798817954517,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 9301
    },
    {
      "epoch": 0.09302,
      "grad_norm": 1.0088796578839236,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 9302
    },
    {
      "epoch": 0.09303,
      "grad_norm": 1.2858796878215408,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 9303
    },
    {
      "epoch": 0.09304,
      "grad_norm": 1.2506744639450122,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 9304
    },
    {
      "epoch": 0.09305,
      "grad_norm": 0.9961386724217615,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 9305
    },
    {
      "epoch": 0.09306,
      "grad_norm": 1.2149212174491897,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 9306
    },
    {
      "epoch": 0.09307,
      "grad_norm": 0.9136578619084738,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 9307
    },
    {
      "epoch": 0.09308,
      "grad_norm": 0.9588826151722423,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 9308
    },
    {
      "epoch": 0.09309,
      "grad_norm": 0.957182512469483,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 9309
    },
    {
      "epoch": 0.0931,
      "grad_norm": 1.2935022678548451,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 9310
    },
    {
      "epoch": 0.09311,
      "grad_norm": 0.9790928444088314,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 9311
    },
    {
      "epoch": 0.09312,
      "grad_norm": 1.150857565224749,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 9312
    },
    {
      "epoch": 0.09313,
      "grad_norm": 1.075107605301709,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 9313
    },
    {
      "epoch": 0.09314,
      "grad_norm": 1.1182712741118235,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 9314
    },
    {
      "epoch": 0.09315,
      "grad_norm": 1.2529463174245064,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 9315
    },
    {
      "epoch": 0.09316,
      "grad_norm": 0.9771774475983117,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 9316
    },
    {
      "epoch": 0.09317,
      "grad_norm": 1.3760618473803496,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 9317
    },
    {
      "epoch": 0.09318,
      "grad_norm": 0.7429498285526682,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 9318
    },
    {
      "epoch": 0.09319,
      "grad_norm": 0.9792252701782946,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 9319
    },
    {
      "epoch": 0.0932,
      "grad_norm": 1.4975639708650046,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 9320
    },
    {
      "epoch": 0.09321,
      "grad_norm": 0.8902587907079172,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 9321
    },
    {
      "epoch": 0.09322,
      "grad_norm": 0.9093500134430933,
      "learning_rate": 0.003,
      "loss": 4.0064,
      "step": 9322
    },
    {
      "epoch": 0.09323,
      "grad_norm": 1.0993796211404194,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 9323
    },
    {
      "epoch": 0.09324,
      "grad_norm": 1.160680899752396,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 9324
    },
    {
      "epoch": 0.09325,
      "grad_norm": 0.9385567636610579,
      "learning_rate": 0.003,
      "loss": 4.0005,
      "step": 9325
    },
    {
      "epoch": 0.09326,
      "grad_norm": 0.9621843602737219,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 9326
    },
    {
      "epoch": 0.09327,
      "grad_norm": 1.2283368544933888,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 9327
    },
    {
      "epoch": 0.09328,
      "grad_norm": 1.138118155085037,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 9328
    },
    {
      "epoch": 0.09329,
      "grad_norm": 1.3360636810202806,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 9329
    },
    {
      "epoch": 0.0933,
      "grad_norm": 0.997995204861989,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 9330
    },
    {
      "epoch": 0.09331,
      "grad_norm": 1.1201516817051664,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 9331
    },
    {
      "epoch": 0.09332,
      "grad_norm": 1.204525418131745,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 9332
    },
    {
      "epoch": 0.09333,
      "grad_norm": 1.0575312400096248,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 9333
    },
    {
      "epoch": 0.09334,
      "grad_norm": 1.2701572053232315,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 9334
    },
    {
      "epoch": 0.09335,
      "grad_norm": 1.1585842605178802,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 9335
    },
    {
      "epoch": 0.09336,
      "grad_norm": 1.0311564658307473,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 9336
    },
    {
      "epoch": 0.09337,
      "grad_norm": 0.9367345134695763,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 9337
    },
    {
      "epoch": 0.09338,
      "grad_norm": 0.9632251089955324,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 9338
    },
    {
      "epoch": 0.09339,
      "grad_norm": 1.0383186326297353,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 9339
    },
    {
      "epoch": 0.0934,
      "grad_norm": 1.086317533403553,
      "learning_rate": 0.003,
      "loss": 4.0169,
      "step": 9340
    },
    {
      "epoch": 0.09341,
      "grad_norm": 1.066313152390867,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 9341
    },
    {
      "epoch": 0.09342,
      "grad_norm": 1.2845490767261778,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 9342
    },
    {
      "epoch": 0.09343,
      "grad_norm": 1.200116094561525,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 9343
    },
    {
      "epoch": 0.09344,
      "grad_norm": 1.1152409513420605,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 9344
    },
    {
      "epoch": 0.09345,
      "grad_norm": 1.1107749483966356,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 9345
    },
    {
      "epoch": 0.09346,
      "grad_norm": 1.1283130554525544,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9346
    },
    {
      "epoch": 0.09347,
      "grad_norm": 1.0736955106456132,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 9347
    },
    {
      "epoch": 0.09348,
      "grad_norm": 1.0736643024406574,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 9348
    },
    {
      "epoch": 0.09349,
      "grad_norm": 1.1989810783719919,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 9349
    },
    {
      "epoch": 0.0935,
      "grad_norm": 1.0740480563955976,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 9350
    },
    {
      "epoch": 0.09351,
      "grad_norm": 0.9996894673701714,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 9351
    },
    {
      "epoch": 0.09352,
      "grad_norm": 1.2492290188517632,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 9352
    },
    {
      "epoch": 0.09353,
      "grad_norm": 1.0984353858675837,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 9353
    },
    {
      "epoch": 0.09354,
      "grad_norm": 1.0494227900731494,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 9354
    },
    {
      "epoch": 0.09355,
      "grad_norm": 1.0887626536039114,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 9355
    },
    {
      "epoch": 0.09356,
      "grad_norm": 1.1063458069116743,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 9356
    },
    {
      "epoch": 0.09357,
      "grad_norm": 1.1772620726746457,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 9357
    },
    {
      "epoch": 0.09358,
      "grad_norm": 0.8349208876816591,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 9358
    },
    {
      "epoch": 0.09359,
      "grad_norm": 0.8649691894837728,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 9359
    },
    {
      "epoch": 0.0936,
      "grad_norm": 0.9462358195573604,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 9360
    },
    {
      "epoch": 0.09361,
      "grad_norm": 1.233886976766547,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 9361
    },
    {
      "epoch": 0.09362,
      "grad_norm": 1.2087100800252286,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 9362
    },
    {
      "epoch": 0.09363,
      "grad_norm": 0.9584661416221599,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 9363
    },
    {
      "epoch": 0.09364,
      "grad_norm": 1.3630115362152988,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 9364
    },
    {
      "epoch": 0.09365,
      "grad_norm": 1.0115250367617443,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 9365
    },
    {
      "epoch": 0.09366,
      "grad_norm": 1.410143423428131,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 9366
    },
    {
      "epoch": 0.09367,
      "grad_norm": 1.0026644890567558,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 9367
    },
    {
      "epoch": 0.09368,
      "grad_norm": 1.2102005850211326,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 9368
    },
    {
      "epoch": 0.09369,
      "grad_norm": 1.1309040815939808,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 9369
    },
    {
      "epoch": 0.0937,
      "grad_norm": 0.9973045348674824,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 9370
    },
    {
      "epoch": 0.09371,
      "grad_norm": 1.231370608565574,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 9371
    },
    {
      "epoch": 0.09372,
      "grad_norm": 1.0924523718241692,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 9372
    },
    {
      "epoch": 0.09373,
      "grad_norm": 1.0504664280616463,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 9373
    },
    {
      "epoch": 0.09374,
      "grad_norm": 1.1272188425084024,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 9374
    },
    {
      "epoch": 0.09375,
      "grad_norm": 1.1348460508047482,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 9375
    },
    {
      "epoch": 0.09376,
      "grad_norm": 1.1298959210406103,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 9376
    },
    {
      "epoch": 0.09377,
      "grad_norm": 1.186524901763047,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 9377
    },
    {
      "epoch": 0.09378,
      "grad_norm": 1.0314318127210171,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 9378
    },
    {
      "epoch": 0.09379,
      "grad_norm": 1.5680911633070103,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 9379
    },
    {
      "epoch": 0.0938,
      "grad_norm": 0.9013424550963559,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 9380
    },
    {
      "epoch": 0.09381,
      "grad_norm": 0.9574099783748187,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 9381
    },
    {
      "epoch": 0.09382,
      "grad_norm": 1.2086515825142305,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 9382
    },
    {
      "epoch": 0.09383,
      "grad_norm": 1.0350405673887666,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 9383
    },
    {
      "epoch": 0.09384,
      "grad_norm": 1.0680807378772312,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 9384
    },
    {
      "epoch": 0.09385,
      "grad_norm": 1.0961146064007061,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 9385
    },
    {
      "epoch": 0.09386,
      "grad_norm": 0.9545455608465853,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 9386
    },
    {
      "epoch": 0.09387,
      "grad_norm": 0.9765112894095569,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 9387
    },
    {
      "epoch": 0.09388,
      "grad_norm": 1.0782991672647486,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 9388
    },
    {
      "epoch": 0.09389,
      "grad_norm": 1.0731997920493421,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 9389
    },
    {
      "epoch": 0.0939,
      "grad_norm": 1.2364650097930605,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 9390
    },
    {
      "epoch": 0.09391,
      "grad_norm": 1.0663640185817809,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 9391
    },
    {
      "epoch": 0.09392,
      "grad_norm": 1.1877737611342285,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 9392
    },
    {
      "epoch": 0.09393,
      "grad_norm": 1.0955254115743633,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 9393
    },
    {
      "epoch": 0.09394,
      "grad_norm": 1.4092921791527784,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 9394
    },
    {
      "epoch": 0.09395,
      "grad_norm": 0.9358321234864352,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 9395
    },
    {
      "epoch": 0.09396,
      "grad_norm": 1.2594661199138193,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 9396
    },
    {
      "epoch": 0.09397,
      "grad_norm": 0.9883524181089541,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 9397
    },
    {
      "epoch": 0.09398,
      "grad_norm": 1.0673063130537923,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 9398
    },
    {
      "epoch": 0.09399,
      "grad_norm": 1.1875700898134038,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 9399
    },
    {
      "epoch": 0.094,
      "grad_norm": 1.0418263510990502,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 9400
    },
    {
      "epoch": 0.09401,
      "grad_norm": 1.1642465528112664,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 9401
    },
    {
      "epoch": 0.09402,
      "grad_norm": 1.2786047972483314,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 9402
    },
    {
      "epoch": 0.09403,
      "grad_norm": 0.8618399669597347,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 9403
    },
    {
      "epoch": 0.09404,
      "grad_norm": 1.102847094348384,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 9404
    },
    {
      "epoch": 0.09405,
      "grad_norm": 1.2834464451783154,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 9405
    },
    {
      "epoch": 0.09406,
      "grad_norm": 0.9871554834956486,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 9406
    },
    {
      "epoch": 0.09407,
      "grad_norm": 1.1610654126184679,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 9407
    },
    {
      "epoch": 0.09408,
      "grad_norm": 1.170917230941906,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 9408
    },
    {
      "epoch": 0.09409,
      "grad_norm": 1.1318228712154021,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 9409
    },
    {
      "epoch": 0.0941,
      "grad_norm": 1.1234390873721771,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 9410
    },
    {
      "epoch": 0.09411,
      "grad_norm": 0.8647601508974965,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 9411
    },
    {
      "epoch": 0.09412,
      "grad_norm": 1.0471840870012292,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 9412
    },
    {
      "epoch": 0.09413,
      "grad_norm": 1.2190310827593032,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 9413
    },
    {
      "epoch": 0.09414,
      "grad_norm": 0.8477403736289396,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 9414
    },
    {
      "epoch": 0.09415,
      "grad_norm": 0.9371919042158918,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 9415
    },
    {
      "epoch": 0.09416,
      "grad_norm": 1.1490720658755587,
      "learning_rate": 0.003,
      "loss": 3.9922,
      "step": 9416
    },
    {
      "epoch": 0.09417,
      "grad_norm": 1.1018851741635174,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 9417
    },
    {
      "epoch": 0.09418,
      "grad_norm": 1.2892671070061477,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 9418
    },
    {
      "epoch": 0.09419,
      "grad_norm": 1.1101253491606295,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 9419
    },
    {
      "epoch": 0.0942,
      "grad_norm": 1.3071470583049012,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 9420
    },
    {
      "epoch": 0.09421,
      "grad_norm": 0.863117611253664,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 9421
    },
    {
      "epoch": 0.09422,
      "grad_norm": 1.0738994904421413,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 9422
    },
    {
      "epoch": 0.09423,
      "grad_norm": 1.1740757656465912,
      "learning_rate": 0.003,
      "loss": 3.9883,
      "step": 9423
    },
    {
      "epoch": 0.09424,
      "grad_norm": 0.9999480734746848,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 9424
    },
    {
      "epoch": 0.09425,
      "grad_norm": 1.1947521810325221,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 9425
    },
    {
      "epoch": 0.09426,
      "grad_norm": 1.076053138054749,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 9426
    },
    {
      "epoch": 0.09427,
      "grad_norm": 1.2539611359358045,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 9427
    },
    {
      "epoch": 0.09428,
      "grad_norm": 1.2042161531904478,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 9428
    },
    {
      "epoch": 0.09429,
      "grad_norm": 1.2520918442905509,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 9429
    },
    {
      "epoch": 0.0943,
      "grad_norm": 0.8090989376412021,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 9430
    },
    {
      "epoch": 0.09431,
      "grad_norm": 0.9501739667787066,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 9431
    },
    {
      "epoch": 0.09432,
      "grad_norm": 1.0564545187269092,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 9432
    },
    {
      "epoch": 0.09433,
      "grad_norm": 1.1105124478676938,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 9433
    },
    {
      "epoch": 0.09434,
      "grad_norm": 1.1365861267709172,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 9434
    },
    {
      "epoch": 0.09435,
      "grad_norm": 1.0626647857526774,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 9435
    },
    {
      "epoch": 0.09436,
      "grad_norm": 1.3048334209489638,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 9436
    },
    {
      "epoch": 0.09437,
      "grad_norm": 1.192258709260609,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 9437
    },
    {
      "epoch": 0.09438,
      "grad_norm": 1.141173762966999,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 9438
    },
    {
      "epoch": 0.09439,
      "grad_norm": 0.9959914893100231,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 9439
    },
    {
      "epoch": 0.0944,
      "grad_norm": 1.1812893575099583,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 9440
    },
    {
      "epoch": 0.09441,
      "grad_norm": 0.9472244329470745,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 9441
    },
    {
      "epoch": 0.09442,
      "grad_norm": 1.1629963337515048,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 9442
    },
    {
      "epoch": 0.09443,
      "grad_norm": 0.8487426150195475,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 9443
    },
    {
      "epoch": 0.09444,
      "grad_norm": 0.9207154747911219,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 9444
    },
    {
      "epoch": 0.09445,
      "grad_norm": 1.115602354686498,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 9445
    },
    {
      "epoch": 0.09446,
      "grad_norm": 1.3343852809989445,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 9446
    },
    {
      "epoch": 0.09447,
      "grad_norm": 0.9863278032640667,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 9447
    },
    {
      "epoch": 0.09448,
      "grad_norm": 1.1763365235364014,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 9448
    },
    {
      "epoch": 0.09449,
      "grad_norm": 1.105703389589132,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 9449
    },
    {
      "epoch": 0.0945,
      "grad_norm": 1.01551147072131,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 9450
    },
    {
      "epoch": 0.09451,
      "grad_norm": 1.0383763855846733,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 9451
    },
    {
      "epoch": 0.09452,
      "grad_norm": 1.0456254521002109,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 9452
    },
    {
      "epoch": 0.09453,
      "grad_norm": 1.3691070796802907,
      "learning_rate": 0.003,
      "loss": 4.0074,
      "step": 9453
    },
    {
      "epoch": 0.09454,
      "grad_norm": 0.9155459999489047,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 9454
    },
    {
      "epoch": 0.09455,
      "grad_norm": 1.0062151085426871,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 9455
    },
    {
      "epoch": 0.09456,
      "grad_norm": 1.775196393766159,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 9456
    },
    {
      "epoch": 0.09457,
      "grad_norm": 0.8268537533403297,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 9457
    },
    {
      "epoch": 0.09458,
      "grad_norm": 0.9446756093027355,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 9458
    },
    {
      "epoch": 0.09459,
      "grad_norm": 1.2872893558697978,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 9459
    },
    {
      "epoch": 0.0946,
      "grad_norm": 1.0414735406519482,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 9460
    },
    {
      "epoch": 0.09461,
      "grad_norm": 1.2923952872869107,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 9461
    },
    {
      "epoch": 0.09462,
      "grad_norm": 1.0982135647456706,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 9462
    },
    {
      "epoch": 0.09463,
      "grad_norm": 1.195985441134548,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 9463
    },
    {
      "epoch": 0.09464,
      "grad_norm": 1.094216310472828,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 9464
    },
    {
      "epoch": 0.09465,
      "grad_norm": 1.1344022479380613,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 9465
    },
    {
      "epoch": 0.09466,
      "grad_norm": 1.1649272025527642,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 9466
    },
    {
      "epoch": 0.09467,
      "grad_norm": 1.1437197614588612,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 9467
    },
    {
      "epoch": 0.09468,
      "grad_norm": 1.134510156394556,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 9468
    },
    {
      "epoch": 0.09469,
      "grad_norm": 1.1456897293699813,
      "learning_rate": 0.003,
      "loss": 4.0158,
      "step": 9469
    },
    {
      "epoch": 0.0947,
      "grad_norm": 0.9370646196560669,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 9470
    },
    {
      "epoch": 0.09471,
      "grad_norm": 1.1605962963311105,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 9471
    },
    {
      "epoch": 0.09472,
      "grad_norm": 1.0220332905341656,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 9472
    },
    {
      "epoch": 0.09473,
      "grad_norm": 1.3119106941290712,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 9473
    },
    {
      "epoch": 0.09474,
      "grad_norm": 1.0696197454349894,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 9474
    },
    {
      "epoch": 0.09475,
      "grad_norm": 1.183557234231492,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 9475
    },
    {
      "epoch": 0.09476,
      "grad_norm": 0.936355154816999,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 9476
    },
    {
      "epoch": 0.09477,
      "grad_norm": 1.0826345363697292,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 9477
    },
    {
      "epoch": 0.09478,
      "grad_norm": 1.2268375814601145,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 9478
    },
    {
      "epoch": 0.09479,
      "grad_norm": 0.9077481557382702,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 9479
    },
    {
      "epoch": 0.0948,
      "grad_norm": 1.1577127173889232,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 9480
    },
    {
      "epoch": 0.09481,
      "grad_norm": 1.164940507783553,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 9481
    },
    {
      "epoch": 0.09482,
      "grad_norm": 1.0376186580785653,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 9482
    },
    {
      "epoch": 0.09483,
      "grad_norm": 1.0668111334949657,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 9483
    },
    {
      "epoch": 0.09484,
      "grad_norm": 0.9544000956467396,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 9484
    },
    {
      "epoch": 0.09485,
      "grad_norm": 1.3563985618454755,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 9485
    },
    {
      "epoch": 0.09486,
      "grad_norm": 0.9907132939829558,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 9486
    },
    {
      "epoch": 0.09487,
      "grad_norm": 1.140513002856383,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 9487
    },
    {
      "epoch": 0.09488,
      "grad_norm": 1.2992557947553103,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 9488
    },
    {
      "epoch": 0.09489,
      "grad_norm": 0.9910288087212725,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 9489
    },
    {
      "epoch": 0.0949,
      "grad_norm": 1.281341022406897,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 9490
    },
    {
      "epoch": 0.09491,
      "grad_norm": 1.0178254732217256,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 9491
    },
    {
      "epoch": 0.09492,
      "grad_norm": 1.0599592630101493,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 9492
    },
    {
      "epoch": 0.09493,
      "grad_norm": 1.0707052087473112,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 9493
    },
    {
      "epoch": 0.09494,
      "grad_norm": 1.184811169391914,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 9494
    },
    {
      "epoch": 0.09495,
      "grad_norm": 1.1050306365464493,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 9495
    },
    {
      "epoch": 0.09496,
      "grad_norm": 1.060159425305823,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 9496
    },
    {
      "epoch": 0.09497,
      "grad_norm": 1.0825941688780625,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 9497
    },
    {
      "epoch": 0.09498,
      "grad_norm": 0.9601571426627881,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 9498
    },
    {
      "epoch": 0.09499,
      "grad_norm": 1.0590750166016245,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 9499
    },
    {
      "epoch": 0.095,
      "grad_norm": 1.2786584082845458,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 9500
    },
    {
      "epoch": 0.09501,
      "grad_norm": 1.1290326342601904,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 9501
    },
    {
      "epoch": 0.09502,
      "grad_norm": 0.9912457750329704,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 9502
    },
    {
      "epoch": 0.09503,
      "grad_norm": 1.152658098138242,
      "learning_rate": 0.003,
      "loss": 3.9938,
      "step": 9503
    },
    {
      "epoch": 0.09504,
      "grad_norm": 1.0919690729103646,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 9504
    },
    {
      "epoch": 0.09505,
      "grad_norm": 1.1519487718748778,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 9505
    },
    {
      "epoch": 0.09506,
      "grad_norm": 1.262006574899147,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 9506
    },
    {
      "epoch": 0.09507,
      "grad_norm": 1.0711373212381228,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 9507
    },
    {
      "epoch": 0.09508,
      "grad_norm": 1.1239192938184404,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 9508
    },
    {
      "epoch": 0.09509,
      "grad_norm": 1.3389515718167913,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 9509
    },
    {
      "epoch": 0.0951,
      "grad_norm": 0.8958060198263552,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 9510
    },
    {
      "epoch": 0.09511,
      "grad_norm": 1.0719956101779906,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 9511
    },
    {
      "epoch": 0.09512,
      "grad_norm": 1.305626481744099,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 9512
    },
    {
      "epoch": 0.09513,
      "grad_norm": 0.9176719274352512,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 9513
    },
    {
      "epoch": 0.09514,
      "grad_norm": 0.9499238409052401,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 9514
    },
    {
      "epoch": 0.09515,
      "grad_norm": 1.063622459645422,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 9515
    },
    {
      "epoch": 0.09516,
      "grad_norm": 1.1652384446120476,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 9516
    },
    {
      "epoch": 0.09517,
      "grad_norm": 1.178061476075233,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 9517
    },
    {
      "epoch": 0.09518,
      "grad_norm": 1.3678980044323894,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 9518
    },
    {
      "epoch": 0.09519,
      "grad_norm": 1.046217095028117,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 9519
    },
    {
      "epoch": 0.0952,
      "grad_norm": 1.1818557546526918,
      "learning_rate": 0.003,
      "loss": 4.0062,
      "step": 9520
    },
    {
      "epoch": 0.09521,
      "grad_norm": 1.1304356171833927,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 9521
    },
    {
      "epoch": 0.09522,
      "grad_norm": 1.120019603471895,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 9522
    },
    {
      "epoch": 0.09523,
      "grad_norm": 1.104879448193623,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 9523
    },
    {
      "epoch": 0.09524,
      "grad_norm": 0.873368086596089,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 9524
    },
    {
      "epoch": 0.09525,
      "grad_norm": 0.9961985148368994,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 9525
    },
    {
      "epoch": 0.09526,
      "grad_norm": 1.2676375654165073,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 9526
    },
    {
      "epoch": 0.09527,
      "grad_norm": 1.1529943279598172,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 9527
    },
    {
      "epoch": 0.09528,
      "grad_norm": 1.131678617939542,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 9528
    },
    {
      "epoch": 0.09529,
      "grad_norm": 1.0147106663564123,
      "learning_rate": 0.003,
      "loss": 4.0101,
      "step": 9529
    },
    {
      "epoch": 0.0953,
      "grad_norm": 1.2878762896528653,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 9530
    },
    {
      "epoch": 0.09531,
      "grad_norm": 1.0096287930269905,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 9531
    },
    {
      "epoch": 0.09532,
      "grad_norm": 1.278489534695974,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 9532
    },
    {
      "epoch": 0.09533,
      "grad_norm": 1.068314907618256,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 9533
    },
    {
      "epoch": 0.09534,
      "grad_norm": 1.0541105462925286,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 9534
    },
    {
      "epoch": 0.09535,
      "grad_norm": 1.0542416896874893,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 9535
    },
    {
      "epoch": 0.09536,
      "grad_norm": 1.203990932473719,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 9536
    },
    {
      "epoch": 0.09537,
      "grad_norm": 1.0609650874854466,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 9537
    },
    {
      "epoch": 0.09538,
      "grad_norm": 1.0566099485690075,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 9538
    },
    {
      "epoch": 0.09539,
      "grad_norm": 1.1711495828610736,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 9539
    },
    {
      "epoch": 0.0954,
      "grad_norm": 1.0370146448146371,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 9540
    },
    {
      "epoch": 0.09541,
      "grad_norm": 1.0159923428799353,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 9541
    },
    {
      "epoch": 0.09542,
      "grad_norm": 1.1293172537885205,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 9542
    },
    {
      "epoch": 0.09543,
      "grad_norm": 1.0450117640491212,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 9543
    },
    {
      "epoch": 0.09544,
      "grad_norm": 1.1603566723882397,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 9544
    },
    {
      "epoch": 0.09545,
      "grad_norm": 1.0447933808065626,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 9545
    },
    {
      "epoch": 0.09546,
      "grad_norm": 0.9458484110007981,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 9546
    },
    {
      "epoch": 0.09547,
      "grad_norm": 1.0876570915875363,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 9547
    },
    {
      "epoch": 0.09548,
      "grad_norm": 1.1875507966969954,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 9548
    },
    {
      "epoch": 0.09549,
      "grad_norm": 0.8814604626466929,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 9549
    },
    {
      "epoch": 0.0955,
      "grad_norm": 1.0246586128840154,
      "learning_rate": 0.003,
      "loss": 4.0062,
      "step": 9550
    },
    {
      "epoch": 0.09551,
      "grad_norm": 1.2387660010338577,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 9551
    },
    {
      "epoch": 0.09552,
      "grad_norm": 0.9754198125538579,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 9552
    },
    {
      "epoch": 0.09553,
      "grad_norm": 1.1577414898591938,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 9553
    },
    {
      "epoch": 0.09554,
      "grad_norm": 1.0570088924704193,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 9554
    },
    {
      "epoch": 0.09555,
      "grad_norm": 1.2106521594790243,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 9555
    },
    {
      "epoch": 0.09556,
      "grad_norm": 1.0733450373625477,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 9556
    },
    {
      "epoch": 0.09557,
      "grad_norm": 1.1919871564102347,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 9557
    },
    {
      "epoch": 0.09558,
      "grad_norm": 1.146997350654266,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 9558
    },
    {
      "epoch": 0.09559,
      "grad_norm": 1.168927128415256,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 9559
    },
    {
      "epoch": 0.0956,
      "grad_norm": 1.11230556582203,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 9560
    },
    {
      "epoch": 0.09561,
      "grad_norm": 1.0881575661118905,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 9561
    },
    {
      "epoch": 0.09562,
      "grad_norm": 1.3389741513893663,
      "learning_rate": 0.003,
      "loss": 4.0088,
      "step": 9562
    },
    {
      "epoch": 0.09563,
      "grad_norm": 1.1779078269635281,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 9563
    },
    {
      "epoch": 0.09564,
      "grad_norm": 1.2633874376703453,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 9564
    },
    {
      "epoch": 0.09565,
      "grad_norm": 0.8461819044779642,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 9565
    },
    {
      "epoch": 0.09566,
      "grad_norm": 0.9430171254625663,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 9566
    },
    {
      "epoch": 0.09567,
      "grad_norm": 1.0543925802682974,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 9567
    },
    {
      "epoch": 0.09568,
      "grad_norm": 1.2147511761418746,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 9568
    },
    {
      "epoch": 0.09569,
      "grad_norm": 1.139812511049965,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 9569
    },
    {
      "epoch": 0.0957,
      "grad_norm": 1.2066618266931726,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 9570
    },
    {
      "epoch": 0.09571,
      "grad_norm": 0.9969088728201041,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 9571
    },
    {
      "epoch": 0.09572,
      "grad_norm": 1.521564608038327,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 9572
    },
    {
      "epoch": 0.09573,
      "grad_norm": 0.767016893061818,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 9573
    },
    {
      "epoch": 0.09574,
      "grad_norm": 0.9077964583623702,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 9574
    },
    {
      "epoch": 0.09575,
      "grad_norm": 0.9789511548929908,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 9575
    },
    {
      "epoch": 0.09576,
      "grad_norm": 1.1272087126973815,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 9576
    },
    {
      "epoch": 0.09577,
      "grad_norm": 1.2010384957358553,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 9577
    },
    {
      "epoch": 0.09578,
      "grad_norm": 1.1758915609790765,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 9578
    },
    {
      "epoch": 0.09579,
      "grad_norm": 1.1677535297765045,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 9579
    },
    {
      "epoch": 0.0958,
      "grad_norm": 1.1752106864410679,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 9580
    },
    {
      "epoch": 0.09581,
      "grad_norm": 1.0390685668953878,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 9581
    },
    {
      "epoch": 0.09582,
      "grad_norm": 0.9448991333037483,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 9582
    },
    {
      "epoch": 0.09583,
      "grad_norm": 1.0699012101296939,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 9583
    },
    {
      "epoch": 0.09584,
      "grad_norm": 1.1114200647456693,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 9584
    },
    {
      "epoch": 0.09585,
      "grad_norm": 1.1110522535404246,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 9585
    },
    {
      "epoch": 0.09586,
      "grad_norm": 1.0693682178529131,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 9586
    },
    {
      "epoch": 0.09587,
      "grad_norm": 1.0907022200921574,
      "learning_rate": 0.003,
      "loss": 4.0018,
      "step": 9587
    },
    {
      "epoch": 0.09588,
      "grad_norm": 1.3052673040023657,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 9588
    },
    {
      "epoch": 0.09589,
      "grad_norm": 1.1273974285859623,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 9589
    },
    {
      "epoch": 0.0959,
      "grad_norm": 0.9575377458395997,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 9590
    },
    {
      "epoch": 0.09591,
      "grad_norm": 1.2408320836823072,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 9591
    },
    {
      "epoch": 0.09592,
      "grad_norm": 1.0230179861054507,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 9592
    },
    {
      "epoch": 0.09593,
      "grad_norm": 1.2007397484572089,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 9593
    },
    {
      "epoch": 0.09594,
      "grad_norm": 0.9589259749600783,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 9594
    },
    {
      "epoch": 0.09595,
      "grad_norm": 1.0812271670511917,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 9595
    },
    {
      "epoch": 0.09596,
      "grad_norm": 1.215276423755771,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 9596
    },
    {
      "epoch": 0.09597,
      "grad_norm": 1.055514060891504,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 9597
    },
    {
      "epoch": 0.09598,
      "grad_norm": 1.0847458195628048,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 9598
    },
    {
      "epoch": 0.09599,
      "grad_norm": 1.3389087793122878,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 9599
    },
    {
      "epoch": 0.096,
      "grad_norm": 0.9055383982261854,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 9600
    },
    {
      "epoch": 0.09601,
      "grad_norm": 0.9811940695731026,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 9601
    },
    {
      "epoch": 0.09602,
      "grad_norm": 1.0789527786902395,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 9602
    },
    {
      "epoch": 0.09603,
      "grad_norm": 1.1727478141730425,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 9603
    },
    {
      "epoch": 0.09604,
      "grad_norm": 1.023524788242411,
      "learning_rate": 0.003,
      "loss": 4.0031,
      "step": 9604
    },
    {
      "epoch": 0.09605,
      "grad_norm": 1.2858580097981922,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 9605
    },
    {
      "epoch": 0.09606,
      "grad_norm": 0.8845738717710074,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 9606
    },
    {
      "epoch": 0.09607,
      "grad_norm": 0.9417694564288732,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 9607
    },
    {
      "epoch": 0.09608,
      "grad_norm": 1.328516456493561,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 9608
    },
    {
      "epoch": 0.09609,
      "grad_norm": 1.2629011203498186,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 9609
    },
    {
      "epoch": 0.0961,
      "grad_norm": 1.1729883232988318,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 9610
    },
    {
      "epoch": 0.09611,
      "grad_norm": 1.393118460283264,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 9611
    },
    {
      "epoch": 0.09612,
      "grad_norm": 1.0698096910097765,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 9612
    },
    {
      "epoch": 0.09613,
      "grad_norm": 1.007696738311522,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 9613
    },
    {
      "epoch": 0.09614,
      "grad_norm": 1.4803986138018956,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 9614
    },
    {
      "epoch": 0.09615,
      "grad_norm": 1.0559026629823391,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 9615
    },
    {
      "epoch": 0.09616,
      "grad_norm": 1.1782897391468559,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 9616
    },
    {
      "epoch": 0.09617,
      "grad_norm": 1.0127744559386622,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 9617
    },
    {
      "epoch": 0.09618,
      "grad_norm": 1.2681610401673449,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 9618
    },
    {
      "epoch": 0.09619,
      "grad_norm": 0.9294594817912581,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 9619
    },
    {
      "epoch": 0.0962,
      "grad_norm": 0.9905671912694861,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 9620
    },
    {
      "epoch": 0.09621,
      "grad_norm": 1.3373852398607728,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 9621
    },
    {
      "epoch": 0.09622,
      "grad_norm": 1.1541351242021758,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 9622
    },
    {
      "epoch": 0.09623,
      "grad_norm": 1.112301616615713,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 9623
    },
    {
      "epoch": 0.09624,
      "grad_norm": 1.3430937174739355,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 9624
    },
    {
      "epoch": 0.09625,
      "grad_norm": 1.080047687143114,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 9625
    },
    {
      "epoch": 0.09626,
      "grad_norm": 1.0694079912518233,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 9626
    },
    {
      "epoch": 0.09627,
      "grad_norm": 1.139432118757624,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 9627
    },
    {
      "epoch": 0.09628,
      "grad_norm": 0.958627843592087,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 9628
    },
    {
      "epoch": 0.09629,
      "grad_norm": 1.0662067967300446,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 9629
    },
    {
      "epoch": 0.0963,
      "grad_norm": 1.0412238973539465,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 9630
    },
    {
      "epoch": 0.09631,
      "grad_norm": 1.0125046957428312,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 9631
    },
    {
      "epoch": 0.09632,
      "grad_norm": 1.3555424752709917,
      "learning_rate": 0.003,
      "loss": 4.0095,
      "step": 9632
    },
    {
      "epoch": 0.09633,
      "grad_norm": 1.063757740872899,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 9633
    },
    {
      "epoch": 0.09634,
      "grad_norm": 1.20136927846276,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 9634
    },
    {
      "epoch": 0.09635,
      "grad_norm": 1.1423655529057926,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 9635
    },
    {
      "epoch": 0.09636,
      "grad_norm": 1.3277021817787018,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 9636
    },
    {
      "epoch": 0.09637,
      "grad_norm": 0.916236476287278,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 9637
    },
    {
      "epoch": 0.09638,
      "grad_norm": 1.1712812625089448,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 9638
    },
    {
      "epoch": 0.09639,
      "grad_norm": 1.1016378675492433,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 9639
    },
    {
      "epoch": 0.0964,
      "grad_norm": 1.0341626312817283,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 9640
    },
    {
      "epoch": 0.09641,
      "grad_norm": 1.1660677439533391,
      "learning_rate": 0.003,
      "loss": 4.0083,
      "step": 9641
    },
    {
      "epoch": 0.09642,
      "grad_norm": 0.9807879097387026,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 9642
    },
    {
      "epoch": 0.09643,
      "grad_norm": 1.2769541125682597,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 9643
    },
    {
      "epoch": 0.09644,
      "grad_norm": 0.9284721817809056,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 9644
    },
    {
      "epoch": 0.09645,
      "grad_norm": 1.2692913801981347,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 9645
    },
    {
      "epoch": 0.09646,
      "grad_norm": 0.9226038318267903,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 9646
    },
    {
      "epoch": 0.09647,
      "grad_norm": 1.1716846999519674,
      "learning_rate": 0.003,
      "loss": 4.0031,
      "step": 9647
    },
    {
      "epoch": 0.09648,
      "grad_norm": 1.0984624781628638,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 9648
    },
    {
      "epoch": 0.09649,
      "grad_norm": 1.032713451324886,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 9649
    },
    {
      "epoch": 0.0965,
      "grad_norm": 1.2893129243430874,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 9650
    },
    {
      "epoch": 0.09651,
      "grad_norm": 1.2914240519466096,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 9651
    },
    {
      "epoch": 0.09652,
      "grad_norm": 1.0304038397431154,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 9652
    },
    {
      "epoch": 0.09653,
      "grad_norm": 1.1558865874046644,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 9653
    },
    {
      "epoch": 0.09654,
      "grad_norm": 1.0769712394558766,
      "learning_rate": 0.003,
      "loss": 4.013,
      "step": 9654
    },
    {
      "epoch": 0.09655,
      "grad_norm": 1.0046687966997174,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 9655
    },
    {
      "epoch": 0.09656,
      "grad_norm": 1.208629121263327,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 9656
    },
    {
      "epoch": 0.09657,
      "grad_norm": 1.0524663213041456,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 9657
    },
    {
      "epoch": 0.09658,
      "grad_norm": 1.1446885277300836,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 9658
    },
    {
      "epoch": 0.09659,
      "grad_norm": 1.1799030583312893,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 9659
    },
    {
      "epoch": 0.0966,
      "grad_norm": 1.1396175467308287,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 9660
    },
    {
      "epoch": 0.09661,
      "grad_norm": 1.2031365613861558,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 9661
    },
    {
      "epoch": 0.09662,
      "grad_norm": 1.0450126941262252,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 9662
    },
    {
      "epoch": 0.09663,
      "grad_norm": 1.1872140736780354,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 9663
    },
    {
      "epoch": 0.09664,
      "grad_norm": 1.10816766795799,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 9664
    },
    {
      "epoch": 0.09665,
      "grad_norm": 1.0920304192442827,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 9665
    },
    {
      "epoch": 0.09666,
      "grad_norm": 1.0614448386527346,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 9666
    },
    {
      "epoch": 0.09667,
      "grad_norm": 1.222612568204848,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 9667
    },
    {
      "epoch": 0.09668,
      "grad_norm": 1.0545430433180965,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 9668
    },
    {
      "epoch": 0.09669,
      "grad_norm": 1.0956783811426631,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 9669
    },
    {
      "epoch": 0.0967,
      "grad_norm": 0.9610892180277101,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 9670
    },
    {
      "epoch": 0.09671,
      "grad_norm": 1.36766207674131,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 9671
    },
    {
      "epoch": 0.09672,
      "grad_norm": 0.9365680632039836,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 9672
    },
    {
      "epoch": 0.09673,
      "grad_norm": 1.0029650441296576,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 9673
    },
    {
      "epoch": 0.09674,
      "grad_norm": 1.1463824116993444,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 9674
    },
    {
      "epoch": 0.09675,
      "grad_norm": 1.058735123681513,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 9675
    },
    {
      "epoch": 0.09676,
      "grad_norm": 1.0766125342365733,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 9676
    },
    {
      "epoch": 0.09677,
      "grad_norm": 1.1203720989393562,
      "learning_rate": 0.003,
      "loss": 4.0015,
      "step": 9677
    },
    {
      "epoch": 0.09678,
      "grad_norm": 1.0951111296319187,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 9678
    },
    {
      "epoch": 0.09679,
      "grad_norm": 1.0249113062787711,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 9679
    },
    {
      "epoch": 0.0968,
      "grad_norm": 1.057027117497527,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 9680
    },
    {
      "epoch": 0.09681,
      "grad_norm": 1.2583816959197518,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 9681
    },
    {
      "epoch": 0.09682,
      "grad_norm": 1.2002373803491195,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 9682
    },
    {
      "epoch": 0.09683,
      "grad_norm": 0.9521536845012109,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 9683
    },
    {
      "epoch": 0.09684,
      "grad_norm": 1.176715675692431,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 9684
    },
    {
      "epoch": 0.09685,
      "grad_norm": 1.2536000297265582,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 9685
    },
    {
      "epoch": 0.09686,
      "grad_norm": 1.2657578935031644,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 9686
    },
    {
      "epoch": 0.09687,
      "grad_norm": 1.045426925577382,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 9687
    },
    {
      "epoch": 0.09688,
      "grad_norm": 1.1541282081642033,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 9688
    },
    {
      "epoch": 0.09689,
      "grad_norm": 1.0905131544606357,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 9689
    },
    {
      "epoch": 0.0969,
      "grad_norm": 1.2174044997009796,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 9690
    },
    {
      "epoch": 0.09691,
      "grad_norm": 0.9988922116041921,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 9691
    },
    {
      "epoch": 0.09692,
      "grad_norm": 1.425363077395206,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 9692
    },
    {
      "epoch": 0.09693,
      "grad_norm": 0.9614291217845489,
      "learning_rate": 0.003,
      "loss": 4.0117,
      "step": 9693
    },
    {
      "epoch": 0.09694,
      "grad_norm": 1.1263744616665738,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 9694
    },
    {
      "epoch": 0.09695,
      "grad_norm": 0.9927369724604576,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 9695
    },
    {
      "epoch": 0.09696,
      "grad_norm": 1.204799634186295,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 9696
    },
    {
      "epoch": 0.09697,
      "grad_norm": 1.1019920635375835,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 9697
    },
    {
      "epoch": 0.09698,
      "grad_norm": 1.3083890446725224,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 9698
    },
    {
      "epoch": 0.09699,
      "grad_norm": 0.8673207887854278,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 9699
    },
    {
      "epoch": 0.097,
      "grad_norm": 1.0937709937276758,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 9700
    },
    {
      "epoch": 0.09701,
      "grad_norm": 1.1096124083859713,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 9701
    },
    {
      "epoch": 0.09702,
      "grad_norm": 1.0470332399648397,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 9702
    },
    {
      "epoch": 0.09703,
      "grad_norm": 1.116033751774405,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 9703
    },
    {
      "epoch": 0.09704,
      "grad_norm": 1.322938797815828,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 9704
    },
    {
      "epoch": 0.09705,
      "grad_norm": 1.1674145052754212,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 9705
    },
    {
      "epoch": 0.09706,
      "grad_norm": 1.080731872467136,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 9706
    },
    {
      "epoch": 0.09707,
      "grad_norm": 1.0930211810900559,
      "learning_rate": 0.003,
      "loss": 4.0044,
      "step": 9707
    },
    {
      "epoch": 0.09708,
      "grad_norm": 1.1779520488029591,
      "learning_rate": 0.003,
      "loss": 4.0047,
      "step": 9708
    },
    {
      "epoch": 0.09709,
      "grad_norm": 1.1874834288052714,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 9709
    },
    {
      "epoch": 0.0971,
      "grad_norm": 1.070110334968628,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 9710
    },
    {
      "epoch": 0.09711,
      "grad_norm": 0.9584275425436,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 9711
    },
    {
      "epoch": 0.09712,
      "grad_norm": 1.2305623246651165,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 9712
    },
    {
      "epoch": 0.09713,
      "grad_norm": 1.0142149218985719,
      "learning_rate": 0.003,
      "loss": 3.9958,
      "step": 9713
    },
    {
      "epoch": 0.09714,
      "grad_norm": 1.0721750280658653,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 9714
    },
    {
      "epoch": 0.09715,
      "grad_norm": 1.0682520259093737,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 9715
    },
    {
      "epoch": 0.09716,
      "grad_norm": 1.0242254335123158,
      "learning_rate": 0.003,
      "loss": 4.0074,
      "step": 9716
    },
    {
      "epoch": 0.09717,
      "grad_norm": 1.1149187761150097,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 9717
    },
    {
      "epoch": 0.09718,
      "grad_norm": 0.9890961402875876,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 9718
    },
    {
      "epoch": 0.09719,
      "grad_norm": 1.0299139105443698,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 9719
    },
    {
      "epoch": 0.0972,
      "grad_norm": 1.1558417735093496,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 9720
    },
    {
      "epoch": 0.09721,
      "grad_norm": 0.9555209453551541,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 9721
    },
    {
      "epoch": 0.09722,
      "grad_norm": 1.1785724250641767,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 9722
    },
    {
      "epoch": 0.09723,
      "grad_norm": 0.9949897584530741,
      "learning_rate": 0.003,
      "loss": 4.0003,
      "step": 9723
    },
    {
      "epoch": 0.09724,
      "grad_norm": 1.2568210621473228,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 9724
    },
    {
      "epoch": 0.09725,
      "grad_norm": 1.332241847275785,
      "learning_rate": 0.003,
      "loss": 3.9916,
      "step": 9725
    },
    {
      "epoch": 0.09726,
      "grad_norm": 1.2144800862045018,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 9726
    },
    {
      "epoch": 0.09727,
      "grad_norm": 1.011363287246841,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 9727
    },
    {
      "epoch": 0.09728,
      "grad_norm": 1.2120085901786424,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 9728
    },
    {
      "epoch": 0.09729,
      "grad_norm": 0.867741516356202,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 9729
    },
    {
      "epoch": 0.0973,
      "grad_norm": 0.8878505496669983,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 9730
    },
    {
      "epoch": 0.09731,
      "grad_norm": 1.0837727849054648,
      "learning_rate": 0.003,
      "loss": 3.9926,
      "step": 9731
    },
    {
      "epoch": 0.09732,
      "grad_norm": 1.2212446559927213,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 9732
    },
    {
      "epoch": 0.09733,
      "grad_norm": 1.2393726295211063,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 9733
    },
    {
      "epoch": 0.09734,
      "grad_norm": 1.188214463071944,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 9734
    },
    {
      "epoch": 0.09735,
      "grad_norm": 0.9872370881140842,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 9735
    },
    {
      "epoch": 0.09736,
      "grad_norm": 1.2580153206408196,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 9736
    },
    {
      "epoch": 0.09737,
      "grad_norm": 0.8841844781969095,
      "learning_rate": 0.003,
      "loss": 3.9805,
      "step": 9737
    },
    {
      "epoch": 0.09738,
      "grad_norm": 0.976812400102225,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 9738
    },
    {
      "epoch": 0.09739,
      "grad_norm": 1.1508306667326322,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 9739
    },
    {
      "epoch": 0.0974,
      "grad_norm": 1.196271566840404,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 9740
    },
    {
      "epoch": 0.09741,
      "grad_norm": 1.3082102267631772,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 9741
    },
    {
      "epoch": 0.09742,
      "grad_norm": 0.8994765241690306,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 9742
    },
    {
      "epoch": 0.09743,
      "grad_norm": 1.1682198162883546,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 9743
    },
    {
      "epoch": 0.09744,
      "grad_norm": 1.1469753871425274,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 9744
    },
    {
      "epoch": 0.09745,
      "grad_norm": 1.0422496922123192,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 9745
    },
    {
      "epoch": 0.09746,
      "grad_norm": 1.1368341501666346,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 9746
    },
    {
      "epoch": 0.09747,
      "grad_norm": 1.0608575439585237,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 9747
    },
    {
      "epoch": 0.09748,
      "grad_norm": 1.1013061298997167,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 9748
    },
    {
      "epoch": 0.09749,
      "grad_norm": 1.0678436420733588,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 9749
    },
    {
      "epoch": 0.0975,
      "grad_norm": 1.2786918164008214,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 9750
    },
    {
      "epoch": 0.09751,
      "grad_norm": 1.28463107870462,
      "learning_rate": 0.003,
      "loss": 4.0028,
      "step": 9751
    },
    {
      "epoch": 0.09752,
      "grad_norm": 1.0428808856437923,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 9752
    },
    {
      "epoch": 0.09753,
      "grad_norm": 1.192666194400153,
      "learning_rate": 0.003,
      "loss": 3.9985,
      "step": 9753
    },
    {
      "epoch": 0.09754,
      "grad_norm": 0.9605862543799322,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 9754
    },
    {
      "epoch": 0.09755,
      "grad_norm": 0.9697232496137559,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 9755
    },
    {
      "epoch": 0.09756,
      "grad_norm": 1.224040574050268,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 9756
    },
    {
      "epoch": 0.09757,
      "grad_norm": 1.1631102520521532,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 9757
    },
    {
      "epoch": 0.09758,
      "grad_norm": 1.2505629557746405,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 9758
    },
    {
      "epoch": 0.09759,
      "grad_norm": 0.9191423900203707,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 9759
    },
    {
      "epoch": 0.0976,
      "grad_norm": 1.1424671614519222,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 9760
    },
    {
      "epoch": 0.09761,
      "grad_norm": 1.102658611997917,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 9761
    },
    {
      "epoch": 0.09762,
      "grad_norm": 1.1444051134380946,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 9762
    },
    {
      "epoch": 0.09763,
      "grad_norm": 0.9264161068395702,
      "learning_rate": 0.003,
      "loss": 3.9995,
      "step": 9763
    },
    {
      "epoch": 0.09764,
      "grad_norm": 1.1546408453955734,
      "learning_rate": 0.003,
      "loss": 4.0017,
      "step": 9764
    },
    {
      "epoch": 0.09765,
      "grad_norm": 1.0603790276325908,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 9765
    },
    {
      "epoch": 0.09766,
      "grad_norm": 1.1751196941995863,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 9766
    },
    {
      "epoch": 0.09767,
      "grad_norm": 1.0177083794650912,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 9767
    },
    {
      "epoch": 0.09768,
      "grad_norm": 1.273731379095236,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 9768
    },
    {
      "epoch": 0.09769,
      "grad_norm": 1.0996220193558155,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 9769
    },
    {
      "epoch": 0.0977,
      "grad_norm": 1.0840911902806654,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 9770
    },
    {
      "epoch": 0.09771,
      "grad_norm": 1.2555935788729693,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 9771
    },
    {
      "epoch": 0.09772,
      "grad_norm": 1.0357209296089733,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 9772
    },
    {
      "epoch": 0.09773,
      "grad_norm": 1.1082243796424802,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 9773
    },
    {
      "epoch": 0.09774,
      "grad_norm": 1.1536368515267816,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 9774
    },
    {
      "epoch": 0.09775,
      "grad_norm": 1.1867161847229521,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 9775
    },
    {
      "epoch": 0.09776,
      "grad_norm": 0.9452459057379259,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 9776
    },
    {
      "epoch": 0.09777,
      "grad_norm": 1.302622907890426,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 9777
    },
    {
      "epoch": 0.09778,
      "grad_norm": 0.9532508643293588,
      "learning_rate": 0.003,
      "loss": 4.0086,
      "step": 9778
    },
    {
      "epoch": 0.09779,
      "grad_norm": 1.1242482678635257,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 9779
    },
    {
      "epoch": 0.0978,
      "grad_norm": 0.9839145612462165,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 9780
    },
    {
      "epoch": 0.09781,
      "grad_norm": 1.1498232001451654,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 9781
    },
    {
      "epoch": 0.09782,
      "grad_norm": 1.271609091807339,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 9782
    },
    {
      "epoch": 0.09783,
      "grad_norm": 1.058335316998838,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 9783
    },
    {
      "epoch": 0.09784,
      "grad_norm": 1.37604134703034,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 9784
    },
    {
      "epoch": 0.09785,
      "grad_norm": 1.1945900295219143,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 9785
    },
    {
      "epoch": 0.09786,
      "grad_norm": 1.1892035335129034,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 9786
    },
    {
      "epoch": 0.09787,
      "grad_norm": 0.9198710416706617,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 9787
    },
    {
      "epoch": 0.09788,
      "grad_norm": 1.0963105756254876,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 9788
    },
    {
      "epoch": 0.09789,
      "grad_norm": 1.0434621221623521,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 9789
    },
    {
      "epoch": 0.0979,
      "grad_norm": 1.1550849940577819,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 9790
    },
    {
      "epoch": 0.09791,
      "grad_norm": 1.357987097806882,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 9791
    },
    {
      "epoch": 0.09792,
      "grad_norm": 1.042044201245543,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 9792
    },
    {
      "epoch": 0.09793,
      "grad_norm": 1.1919512358344675,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 9793
    },
    {
      "epoch": 0.09794,
      "grad_norm": 1.2241595549926594,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 9794
    },
    {
      "epoch": 0.09795,
      "grad_norm": 1.0586631623737053,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 9795
    },
    {
      "epoch": 0.09796,
      "grad_norm": 1.1914802085441085,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 9796
    },
    {
      "epoch": 0.09797,
      "grad_norm": 1.1115787515349747,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 9797
    },
    {
      "epoch": 0.09798,
      "grad_norm": 0.9816749943466317,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 9798
    },
    {
      "epoch": 0.09799,
      "grad_norm": 1.1660406158959165,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 9799
    },
    {
      "epoch": 0.098,
      "grad_norm": 1.0709459229028373,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 9800
    },
    {
      "epoch": 0.09801,
      "grad_norm": 1.1990212956387598,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 9801
    },
    {
      "epoch": 0.09802,
      "grad_norm": 1.0567665044317882,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 9802
    },
    {
      "epoch": 0.09803,
      "grad_norm": 1.0778757992404968,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 9803
    },
    {
      "epoch": 0.09804,
      "grad_norm": 1.0525175964682807,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 9804
    },
    {
      "epoch": 0.09805,
      "grad_norm": 1.241338187112226,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 9805
    },
    {
      "epoch": 0.09806,
      "grad_norm": 1.0640857294444321,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 9806
    },
    {
      "epoch": 0.09807,
      "grad_norm": 1.1456311229706166,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 9807
    },
    {
      "epoch": 0.09808,
      "grad_norm": 1.1124715383770616,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 9808
    },
    {
      "epoch": 0.09809,
      "grad_norm": 1.0282435668770187,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 9809
    },
    {
      "epoch": 0.0981,
      "grad_norm": 1.0398795512010792,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 9810
    },
    {
      "epoch": 0.09811,
      "grad_norm": 0.9279075100819835,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 9811
    },
    {
      "epoch": 0.09812,
      "grad_norm": 1.0782186992787437,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 9812
    },
    {
      "epoch": 0.09813,
      "grad_norm": 1.3085300578739432,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 9813
    },
    {
      "epoch": 0.09814,
      "grad_norm": 1.1101588548323373,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 9814
    },
    {
      "epoch": 0.09815,
      "grad_norm": 1.1431391579353671,
      "learning_rate": 0.003,
      "loss": 3.9915,
      "step": 9815
    },
    {
      "epoch": 0.09816,
      "grad_norm": 1.164627516774537,
      "learning_rate": 0.003,
      "loss": 3.9982,
      "step": 9816
    },
    {
      "epoch": 0.09817,
      "grad_norm": 1.1241257621100056,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 9817
    },
    {
      "epoch": 0.09818,
      "grad_norm": 1.1105822418561284,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 9818
    },
    {
      "epoch": 0.09819,
      "grad_norm": 1.2260148961373039,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 9819
    },
    {
      "epoch": 0.0982,
      "grad_norm": 1.0867720780444274,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 9820
    },
    {
      "epoch": 0.09821,
      "grad_norm": 0.925875521249806,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 9821
    },
    {
      "epoch": 0.09822,
      "grad_norm": 1.4487529076721064,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 9822
    },
    {
      "epoch": 0.09823,
      "grad_norm": 0.8294595735585626,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 9823
    },
    {
      "epoch": 0.09824,
      "grad_norm": 1.073309157510469,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 9824
    },
    {
      "epoch": 0.09825,
      "grad_norm": 1.279144134720854,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 9825
    },
    {
      "epoch": 0.09826,
      "grad_norm": 1.1476433885186508,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 9826
    },
    {
      "epoch": 0.09827,
      "grad_norm": 1.043838880236461,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 9827
    },
    {
      "epoch": 0.09828,
      "grad_norm": 1.1968693614717298,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 9828
    },
    {
      "epoch": 0.09829,
      "grad_norm": 1.0457138129997414,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 9829
    },
    {
      "epoch": 0.0983,
      "grad_norm": 1.5123833188618667,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 9830
    },
    {
      "epoch": 0.09831,
      "grad_norm": 0.8711665475933555,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 9831
    },
    {
      "epoch": 0.09832,
      "grad_norm": 1.144386517076489,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 9832
    },
    {
      "epoch": 0.09833,
      "grad_norm": 1.246511769111947,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 9833
    },
    {
      "epoch": 0.09834,
      "grad_norm": 1.13349377739888,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 9834
    },
    {
      "epoch": 0.09835,
      "grad_norm": 1.0894251468551766,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 9835
    },
    {
      "epoch": 0.09836,
      "grad_norm": 1.1354298533553584,
      "learning_rate": 0.003,
      "loss": 4.0041,
      "step": 9836
    },
    {
      "epoch": 0.09837,
      "grad_norm": 1.1000173143853216,
      "learning_rate": 0.003,
      "loss": 3.996,
      "step": 9837
    },
    {
      "epoch": 0.09838,
      "grad_norm": 1.119210791465233,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 9838
    },
    {
      "epoch": 0.09839,
      "grad_norm": 0.9524635366093233,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 9839
    },
    {
      "epoch": 0.0984,
      "grad_norm": 1.0797554846674096,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 9840
    },
    {
      "epoch": 0.09841,
      "grad_norm": 1.1411790679575085,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 9841
    },
    {
      "epoch": 0.09842,
      "grad_norm": 1.1281791126318816,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 9842
    },
    {
      "epoch": 0.09843,
      "grad_norm": 0.9134744520495551,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 9843
    },
    {
      "epoch": 0.09844,
      "grad_norm": 1.0160022299473974,
      "learning_rate": 0.003,
      "loss": 4.0062,
      "step": 9844
    },
    {
      "epoch": 0.09845,
      "grad_norm": 1.3428743136543202,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 9845
    },
    {
      "epoch": 0.09846,
      "grad_norm": 1.0189991251253638,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 9846
    },
    {
      "epoch": 0.09847,
      "grad_norm": 1.1704243872927704,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 9847
    },
    {
      "epoch": 0.09848,
      "grad_norm": 0.9081125793690473,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 9848
    },
    {
      "epoch": 0.09849,
      "grad_norm": 1.0336695774661442,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 9849
    },
    {
      "epoch": 0.0985,
      "grad_norm": 1.185375263926777,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 9850
    },
    {
      "epoch": 0.09851,
      "grad_norm": 1.1305392937955534,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 9851
    },
    {
      "epoch": 0.09852,
      "grad_norm": 0.9581592333628471,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 9852
    },
    {
      "epoch": 0.09853,
      "grad_norm": 1.2114861648250734,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 9853
    },
    {
      "epoch": 0.09854,
      "grad_norm": 1.0284507718101719,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 9854
    },
    {
      "epoch": 0.09855,
      "grad_norm": 1.2232319748941225,
      "learning_rate": 0.003,
      "loss": 4.0038,
      "step": 9855
    },
    {
      "epoch": 0.09856,
      "grad_norm": 1.0855515860790759,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 9856
    },
    {
      "epoch": 0.09857,
      "grad_norm": 1.0753853243294782,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 9857
    },
    {
      "epoch": 0.09858,
      "grad_norm": 1.206054994842722,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 9858
    },
    {
      "epoch": 0.09859,
      "grad_norm": 1.098563875256706,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 9859
    },
    {
      "epoch": 0.0986,
      "grad_norm": 1.1139961945407197,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 9860
    },
    {
      "epoch": 0.09861,
      "grad_norm": 1.1965232138904764,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 9861
    },
    {
      "epoch": 0.09862,
      "grad_norm": 1.1112915398055838,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 9862
    },
    {
      "epoch": 0.09863,
      "grad_norm": 1.1765592866838956,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 9863
    },
    {
      "epoch": 0.09864,
      "grad_norm": 1.281604965271072,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 9864
    },
    {
      "epoch": 0.09865,
      "grad_norm": 0.9474747599256009,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 9865
    },
    {
      "epoch": 0.09866,
      "grad_norm": 1.2526292758671924,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 9866
    },
    {
      "epoch": 0.09867,
      "grad_norm": 0.9782340542182837,
      "learning_rate": 0.003,
      "loss": 4.0073,
      "step": 9867
    },
    {
      "epoch": 0.09868,
      "grad_norm": 1.2384674527105715,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 9868
    },
    {
      "epoch": 0.09869,
      "grad_norm": 1.1272829215817646,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 9869
    },
    {
      "epoch": 0.0987,
      "grad_norm": 1.0396668913063039,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 9870
    },
    {
      "epoch": 0.09871,
      "grad_norm": 1.037702753803307,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 9871
    },
    {
      "epoch": 0.09872,
      "grad_norm": 1.1864414611980871,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 9872
    },
    {
      "epoch": 0.09873,
      "grad_norm": 1.0076047434777218,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 9873
    },
    {
      "epoch": 0.09874,
      "grad_norm": 1.1197613528576318,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 9874
    },
    {
      "epoch": 0.09875,
      "grad_norm": 0.9686098749546355,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 9875
    },
    {
      "epoch": 0.09876,
      "grad_norm": 1.04278415670397,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 9876
    },
    {
      "epoch": 0.09877,
      "grad_norm": 1.402750496107436,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 9877
    },
    {
      "epoch": 0.09878,
      "grad_norm": 1.2330935372023277,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 9878
    },
    {
      "epoch": 0.09879,
      "grad_norm": 1.025634526463866,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 9879
    },
    {
      "epoch": 0.0988,
      "grad_norm": 1.0537640601114184,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 9880
    },
    {
      "epoch": 0.09881,
      "grad_norm": 1.2290027208808174,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 9881
    },
    {
      "epoch": 0.09882,
      "grad_norm": 0.8930767525332688,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 9882
    },
    {
      "epoch": 0.09883,
      "grad_norm": 1.0530103820851344,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 9883
    },
    {
      "epoch": 0.09884,
      "grad_norm": 1.2191490701919565,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 9884
    },
    {
      "epoch": 0.09885,
      "grad_norm": 1.0218706297933426,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 9885
    },
    {
      "epoch": 0.09886,
      "grad_norm": 1.150967177807416,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 9886
    },
    {
      "epoch": 0.09887,
      "grad_norm": 1.129373196803706,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 9887
    },
    {
      "epoch": 0.09888,
      "grad_norm": 1.149004087106683,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 9888
    },
    {
      "epoch": 0.09889,
      "grad_norm": 1.2702764689102495,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 9889
    },
    {
      "epoch": 0.0989,
      "grad_norm": 1.2676550107001645,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 9890
    },
    {
      "epoch": 0.09891,
      "grad_norm": 1.2615471554776427,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 9891
    },
    {
      "epoch": 0.09892,
      "grad_norm": 1.1167316738746,
      "learning_rate": 0.003,
      "loss": 4.0055,
      "step": 9892
    },
    {
      "epoch": 0.09893,
      "grad_norm": 0.9054671662434778,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 9893
    },
    {
      "epoch": 0.09894,
      "grad_norm": 0.9236484052010723,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 9894
    },
    {
      "epoch": 0.09895,
      "grad_norm": 1.208985347287342,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 9895
    },
    {
      "epoch": 0.09896,
      "grad_norm": 1.061049368371955,
      "learning_rate": 0.003,
      "loss": 4.0129,
      "step": 9896
    },
    {
      "epoch": 0.09897,
      "grad_norm": 1.329238894639678,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 9897
    },
    {
      "epoch": 0.09898,
      "grad_norm": 0.9855278608052852,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 9898
    },
    {
      "epoch": 0.09899,
      "grad_norm": 1.1077024624762888,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 9899
    },
    {
      "epoch": 0.099,
      "grad_norm": 1.206281945713825,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 9900
    },
    {
      "epoch": 0.09901,
      "grad_norm": 1.1299933707769678,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 9901
    },
    {
      "epoch": 0.09902,
      "grad_norm": 1.195651589757622,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 9902
    },
    {
      "epoch": 0.09903,
      "grad_norm": 0.9114394972258246,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 9903
    },
    {
      "epoch": 0.09904,
      "grad_norm": 0.8585107487801131,
      "learning_rate": 0.003,
      "loss": 4.0026,
      "step": 9904
    },
    {
      "epoch": 0.09905,
      "grad_norm": 0.8579026965268124,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 9905
    },
    {
      "epoch": 0.09906,
      "grad_norm": 0.9985171119339783,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 9906
    },
    {
      "epoch": 0.09907,
      "grad_norm": 1.1354189296859807,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 9907
    },
    {
      "epoch": 0.09908,
      "grad_norm": 1.0976033717968405,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 9908
    },
    {
      "epoch": 0.09909,
      "grad_norm": 1.1043513109244145,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 9909
    },
    {
      "epoch": 0.0991,
      "grad_norm": 1.2913134989756772,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 9910
    },
    {
      "epoch": 0.09911,
      "grad_norm": 1.0885873929556769,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 9911
    },
    {
      "epoch": 0.09912,
      "grad_norm": 1.439636649962891,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 9912
    },
    {
      "epoch": 0.09913,
      "grad_norm": 0.9927728387511651,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 9913
    },
    {
      "epoch": 0.09914,
      "grad_norm": 1.224689926683272,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 9914
    },
    {
      "epoch": 0.09915,
      "grad_norm": 1.0185052815824782,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 9915
    },
    {
      "epoch": 0.09916,
      "grad_norm": 1.1039015304713733,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 9916
    },
    {
      "epoch": 0.09917,
      "grad_norm": 0.9560283744143093,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 9917
    },
    {
      "epoch": 0.09918,
      "grad_norm": 1.249009304246015,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 9918
    },
    {
      "epoch": 0.09919,
      "grad_norm": 1.0276598807835757,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 9919
    },
    {
      "epoch": 0.0992,
      "grad_norm": 1.4384067882969795,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 9920
    },
    {
      "epoch": 0.09921,
      "grad_norm": 1.0855607508489593,
      "learning_rate": 0.003,
      "loss": 3.9945,
      "step": 9921
    },
    {
      "epoch": 0.09922,
      "grad_norm": 1.3281753668853509,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 9922
    },
    {
      "epoch": 0.09923,
      "grad_norm": 1.3210796402306015,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 9923
    },
    {
      "epoch": 0.09924,
      "grad_norm": 0.9624480778672281,
      "learning_rate": 0.003,
      "loss": 4.0023,
      "step": 9924
    },
    {
      "epoch": 0.09925,
      "grad_norm": 1.0010419815015703,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 9925
    },
    {
      "epoch": 0.09926,
      "grad_norm": 1.481720653662113,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 9926
    },
    {
      "epoch": 0.09927,
      "grad_norm": 0.9737956838613526,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 9927
    },
    {
      "epoch": 0.09928,
      "grad_norm": 1.0743465025663415,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 9928
    },
    {
      "epoch": 0.09929,
      "grad_norm": 1.0978928933680199,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 9929
    },
    {
      "epoch": 0.0993,
      "grad_norm": 1.154918711721139,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 9930
    },
    {
      "epoch": 0.09931,
      "grad_norm": 1.292932588277131,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 9931
    },
    {
      "epoch": 0.09932,
      "grad_norm": 1.068387070754382,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 9932
    },
    {
      "epoch": 0.09933,
      "grad_norm": 1.0193039042669239,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 9933
    },
    {
      "epoch": 0.09934,
      "grad_norm": 1.0548178768539755,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 9934
    },
    {
      "epoch": 0.09935,
      "grad_norm": 1.1606839398396906,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 9935
    },
    {
      "epoch": 0.09936,
      "grad_norm": 1.2692975261466353,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 9936
    },
    {
      "epoch": 0.09937,
      "grad_norm": 1.0782042799196752,
      "learning_rate": 0.003,
      "loss": 4.0072,
      "step": 9937
    },
    {
      "epoch": 0.09938,
      "grad_norm": 1.127980051105687,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 9938
    },
    {
      "epoch": 0.09939,
      "grad_norm": 1.0287441783269569,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 9939
    },
    {
      "epoch": 0.0994,
      "grad_norm": 1.2132424703891527,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 9940
    },
    {
      "epoch": 0.09941,
      "grad_norm": 0.9732597157956879,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 9941
    },
    {
      "epoch": 0.09942,
      "grad_norm": 1.1389068598359773,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 9942
    },
    {
      "epoch": 0.09943,
      "grad_norm": 1.102208282726071,
      "learning_rate": 0.003,
      "loss": 3.9767,
      "step": 9943
    },
    {
      "epoch": 0.09944,
      "grad_norm": 1.1127584490971916,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 9944
    },
    {
      "epoch": 0.09945,
      "grad_norm": 1.1635207514619523,
      "learning_rate": 0.003,
      "loss": 3.9994,
      "step": 9945
    },
    {
      "epoch": 0.09946,
      "grad_norm": 1.0595853146121823,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 9946
    },
    {
      "epoch": 0.09947,
      "grad_norm": 1.2408973624096764,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 9947
    },
    {
      "epoch": 0.09948,
      "grad_norm": 1.2344813666227283,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9948
    },
    {
      "epoch": 0.09949,
      "grad_norm": 1.1766030065853175,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 9949
    },
    {
      "epoch": 0.0995,
      "grad_norm": 1.0973536641516977,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 9950
    },
    {
      "epoch": 0.09951,
      "grad_norm": 1.0236399693187321,
      "learning_rate": 0.003,
      "loss": 4.008,
      "step": 9951
    },
    {
      "epoch": 0.09952,
      "grad_norm": 0.9915712705437711,
      "learning_rate": 0.003,
      "loss": 3.9982,
      "step": 9952
    },
    {
      "epoch": 0.09953,
      "grad_norm": 1.1963753411506206,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 9953
    },
    {
      "epoch": 0.09954,
      "grad_norm": 1.104880815250887,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 9954
    },
    {
      "epoch": 0.09955,
      "grad_norm": 1.1708956908608728,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 9955
    },
    {
      "epoch": 0.09956,
      "grad_norm": 0.9866423739533496,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 9956
    },
    {
      "epoch": 0.09957,
      "grad_norm": 1.3165503592770573,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 9957
    },
    {
      "epoch": 0.09958,
      "grad_norm": 0.9536837415813932,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9958
    },
    {
      "epoch": 0.09959,
      "grad_norm": 1.3409977002707099,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 9959
    },
    {
      "epoch": 0.0996,
      "grad_norm": 0.9814138686721887,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 9960
    },
    {
      "epoch": 0.09961,
      "grad_norm": 1.3803176304264348,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 9961
    },
    {
      "epoch": 0.09962,
      "grad_norm": 1.1780995573672939,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 9962
    },
    {
      "epoch": 0.09963,
      "grad_norm": 1.0505789057253156,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 9963
    },
    {
      "epoch": 0.09964,
      "grad_norm": 1.2577481781152686,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 9964
    },
    {
      "epoch": 0.09965,
      "grad_norm": 0.8750383187391458,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 9965
    },
    {
      "epoch": 0.09966,
      "grad_norm": 0.9324526260136522,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 9966
    },
    {
      "epoch": 0.09967,
      "grad_norm": 1.3064826835022834,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 9967
    },
    {
      "epoch": 0.09968,
      "grad_norm": 1.0488845951972847,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 9968
    },
    {
      "epoch": 0.09969,
      "grad_norm": 1.2753549754302325,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 9969
    },
    {
      "epoch": 0.0997,
      "grad_norm": 1.039234075567994,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 9970
    },
    {
      "epoch": 0.09971,
      "grad_norm": 1.2214075051221316,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 9971
    },
    {
      "epoch": 0.09972,
      "grad_norm": 1.1623111884041941,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 9972
    },
    {
      "epoch": 0.09973,
      "grad_norm": 1.1088110650963057,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 9973
    },
    {
      "epoch": 0.09974,
      "grad_norm": 1.0426875423604298,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 9974
    },
    {
      "epoch": 0.09975,
      "grad_norm": 1.259426553612719,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 9975
    },
    {
      "epoch": 0.09976,
      "grad_norm": 1.0298690402873982,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 9976
    },
    {
      "epoch": 0.09977,
      "grad_norm": 1.3610931313558177,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 9977
    },
    {
      "epoch": 0.09978,
      "grad_norm": 0.9132158967205964,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 9978
    },
    {
      "epoch": 0.09979,
      "grad_norm": 1.219498068275462,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 9979
    },
    {
      "epoch": 0.0998,
      "grad_norm": 1.1274470393928808,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 9980
    },
    {
      "epoch": 0.09981,
      "grad_norm": 1.1231505756772355,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 9981
    },
    {
      "epoch": 0.09982,
      "grad_norm": 1.1169231509650528,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 9982
    },
    {
      "epoch": 0.09983,
      "grad_norm": 1.1018331890490325,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 9983
    },
    {
      "epoch": 0.09984,
      "grad_norm": 1.0714964487278968,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 9984
    },
    {
      "epoch": 0.09985,
      "grad_norm": 1.1863401109530898,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 9985
    },
    {
      "epoch": 0.09986,
      "grad_norm": 1.042279602365793,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 9986
    },
    {
      "epoch": 0.09987,
      "grad_norm": 1.2665835602618778,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 9987
    },
    {
      "epoch": 0.09988,
      "grad_norm": 0.89345425739561,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 9988
    },
    {
      "epoch": 0.09989,
      "grad_norm": 1.1833757769860112,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 9989
    },
    {
      "epoch": 0.0999,
      "grad_norm": 1.2356695097233596,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 9990
    },
    {
      "epoch": 0.09991,
      "grad_norm": 1.1575667617820786,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 9991
    },
    {
      "epoch": 0.09992,
      "grad_norm": 1.0547400553533788,
      "learning_rate": 0.003,
      "loss": 4.0115,
      "step": 9992
    },
    {
      "epoch": 0.09993,
      "grad_norm": 1.1235377413439607,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 9993
    },
    {
      "epoch": 0.09994,
      "grad_norm": 1.0785414820906185,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 9994
    },
    {
      "epoch": 0.09995,
      "grad_norm": 1.2688086844276698,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 9995
    },
    {
      "epoch": 0.09996,
      "grad_norm": 0.9896774509795319,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 9996
    },
    {
      "epoch": 0.09997,
      "grad_norm": 1.4769420952220298,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 9997
    },
    {
      "epoch": 0.09998,
      "grad_norm": 1.1514423885404736,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 9998
    },
    {
      "epoch": 0.09999,
      "grad_norm": 0.9975467451325027,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 9999
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0688722481962825,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 10000
    },
    {
      "epoch": 0.10001,
      "grad_norm": 1.0376217486708665,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 10001
    },
    {
      "epoch": 0.10002,
      "grad_norm": 1.1705774677906358,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 10002
    },
    {
      "epoch": 0.10003,
      "grad_norm": 0.9861302809355035,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 10003
    },
    {
      "epoch": 0.10004,
      "grad_norm": 1.1789334001801655,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 10004
    },
    {
      "epoch": 0.10005,
      "grad_norm": 1.1989313194786173,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 10005
    },
    {
      "epoch": 0.10006,
      "grad_norm": 0.9685441038364853,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 10006
    },
    {
      "epoch": 0.10007,
      "grad_norm": 1.1406363380895599,
      "learning_rate": 0.003,
      "loss": 4.013,
      "step": 10007
    },
    {
      "epoch": 0.10008,
      "grad_norm": 1.1401310833512326,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 10008
    },
    {
      "epoch": 0.10009,
      "grad_norm": 1.2874194973352402,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 10009
    },
    {
      "epoch": 0.1001,
      "grad_norm": 0.9395473569554981,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 10010
    },
    {
      "epoch": 0.10011,
      "grad_norm": 1.1533792238419305,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 10011
    },
    {
      "epoch": 0.10012,
      "grad_norm": 1.1011717780365395,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 10012
    },
    {
      "epoch": 0.10013,
      "grad_norm": 1.1016950112639643,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 10013
    },
    {
      "epoch": 0.10014,
      "grad_norm": 1.1527773791821763,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 10014
    },
    {
      "epoch": 0.10015,
      "grad_norm": 1.1942046889073201,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 10015
    },
    {
      "epoch": 0.10016,
      "grad_norm": 0.957925342866834,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 10016
    },
    {
      "epoch": 0.10017,
      "grad_norm": 1.15252875423962,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 10017
    },
    {
      "epoch": 0.10018,
      "grad_norm": 1.0374379123877617,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 10018
    },
    {
      "epoch": 0.10019,
      "grad_norm": 1.1794252200816104,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 10019
    },
    {
      "epoch": 0.1002,
      "grad_norm": 1.5203356193802966,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 10020
    },
    {
      "epoch": 0.10021,
      "grad_norm": 1.0029056619089192,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 10021
    },
    {
      "epoch": 0.10022,
      "grad_norm": 1.3936100699293918,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 10022
    },
    {
      "epoch": 0.10023,
      "grad_norm": 0.9821509836904537,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 10023
    },
    {
      "epoch": 0.10024,
      "grad_norm": 1.0077875906773899,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 10024
    },
    {
      "epoch": 0.10025,
      "grad_norm": 1.2413891542340003,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 10025
    },
    {
      "epoch": 0.10026,
      "grad_norm": 0.9367455732537656,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 10026
    },
    {
      "epoch": 0.10027,
      "grad_norm": 1.0593921888120654,
      "learning_rate": 0.003,
      "loss": 4.0129,
      "step": 10027
    },
    {
      "epoch": 0.10028,
      "grad_norm": 1.167695592578088,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 10028
    },
    {
      "epoch": 0.10029,
      "grad_norm": 1.1890588826725337,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 10029
    },
    {
      "epoch": 0.1003,
      "grad_norm": 1.1200344528615116,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 10030
    },
    {
      "epoch": 0.10031,
      "grad_norm": 1.1417105447998874,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 10031
    },
    {
      "epoch": 0.10032,
      "grad_norm": 1.0185250099652587,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 10032
    },
    {
      "epoch": 0.10033,
      "grad_norm": 1.370599313867179,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 10033
    },
    {
      "epoch": 0.10034,
      "grad_norm": 1.1853235683781365,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 10034
    },
    {
      "epoch": 0.10035,
      "grad_norm": 1.063599029412153,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 10035
    },
    {
      "epoch": 0.10036,
      "grad_norm": 1.3175134033811957,
      "learning_rate": 0.003,
      "loss": 4.0032,
      "step": 10036
    },
    {
      "epoch": 0.10037,
      "grad_norm": 1.0960419587396677,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 10037
    },
    {
      "epoch": 0.10038,
      "grad_norm": 1.0317353100182982,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 10038
    },
    {
      "epoch": 0.10039,
      "grad_norm": 1.0757103090572837,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 10039
    },
    {
      "epoch": 0.1004,
      "grad_norm": 1.087521733469096,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 10040
    },
    {
      "epoch": 0.10041,
      "grad_norm": 1.2084731171411833,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 10041
    },
    {
      "epoch": 0.10042,
      "grad_norm": 0.9946783035892525,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 10042
    },
    {
      "epoch": 0.10043,
      "grad_norm": 1.4298803409305711,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 10043
    },
    {
      "epoch": 0.10044,
      "grad_norm": 0.9427138046048616,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 10044
    },
    {
      "epoch": 0.10045,
      "grad_norm": 1.1440424909635767,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 10045
    },
    {
      "epoch": 0.10046,
      "grad_norm": 1.2208645128728925,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 10046
    },
    {
      "epoch": 0.10047,
      "grad_norm": 1.1520779920124797,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 10047
    },
    {
      "epoch": 0.10048,
      "grad_norm": 1.1859885677535336,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 10048
    },
    {
      "epoch": 0.10049,
      "grad_norm": 1.1159990063667586,
      "learning_rate": 0.003,
      "loss": 4.0078,
      "step": 10049
    },
    {
      "epoch": 0.1005,
      "grad_norm": 1.1289846456525583,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 10050
    },
    {
      "epoch": 0.10051,
      "grad_norm": 1.113938198451074,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 10051
    },
    {
      "epoch": 0.10052,
      "grad_norm": 1.108899705969844,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 10052
    },
    {
      "epoch": 0.10053,
      "grad_norm": 1.3756042460285796,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 10053
    },
    {
      "epoch": 0.10054,
      "grad_norm": 0.9418379581640134,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 10054
    },
    {
      "epoch": 0.10055,
      "grad_norm": 1.1739862813921886,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 10055
    },
    {
      "epoch": 0.10056,
      "grad_norm": 0.9555340144478537,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 10056
    },
    {
      "epoch": 0.10057,
      "grad_norm": 1.1683761393937124,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 10057
    },
    {
      "epoch": 0.10058,
      "grad_norm": 0.9254542092215158,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 10058
    },
    {
      "epoch": 0.10059,
      "grad_norm": 1.0185610648270182,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 10059
    },
    {
      "epoch": 0.1006,
      "grad_norm": 1.1877134215630414,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 10060
    },
    {
      "epoch": 0.10061,
      "grad_norm": 1.1455483826531043,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 10061
    },
    {
      "epoch": 0.10062,
      "grad_norm": 1.2513080657957403,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 10062
    },
    {
      "epoch": 0.10063,
      "grad_norm": 1.2124912360540205,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 10063
    },
    {
      "epoch": 0.10064,
      "grad_norm": 1.1274860749990432,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 10064
    },
    {
      "epoch": 0.10065,
      "grad_norm": 1.2298310787584599,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 10065
    },
    {
      "epoch": 0.10066,
      "grad_norm": 0.9103543254343075,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 10066
    },
    {
      "epoch": 0.10067,
      "grad_norm": 1.0030930500020137,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 10067
    },
    {
      "epoch": 0.10068,
      "grad_norm": 1.2779301791387994,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 10068
    },
    {
      "epoch": 0.10069,
      "grad_norm": 0.8818316506876382,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 10069
    },
    {
      "epoch": 0.1007,
      "grad_norm": 0.9710635888596929,
      "learning_rate": 0.003,
      "loss": 4.0113,
      "step": 10070
    },
    {
      "epoch": 0.10071,
      "grad_norm": 1.2723709892951096,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 10071
    },
    {
      "epoch": 0.10072,
      "grad_norm": 1.0605681039514947,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 10072
    },
    {
      "epoch": 0.10073,
      "grad_norm": 1.2166085354095868,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 10073
    },
    {
      "epoch": 0.10074,
      "grad_norm": 1.2473625383962492,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 10074
    },
    {
      "epoch": 0.10075,
      "grad_norm": 1.2226275619264804,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 10075
    },
    {
      "epoch": 0.10076,
      "grad_norm": 1.229825772234811,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 10076
    },
    {
      "epoch": 0.10077,
      "grad_norm": 0.9456387474821364,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 10077
    },
    {
      "epoch": 0.10078,
      "grad_norm": 1.1385609146582953,
      "learning_rate": 0.003,
      "loss": 4.0091,
      "step": 10078
    },
    {
      "epoch": 0.10079,
      "grad_norm": 1.2697352120922631,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 10079
    },
    {
      "epoch": 0.1008,
      "grad_norm": 1.4025860537648713,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 10080
    },
    {
      "epoch": 0.10081,
      "grad_norm": 1.0216197686884323,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 10081
    },
    {
      "epoch": 0.10082,
      "grad_norm": 1.132424109176119,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 10082
    },
    {
      "epoch": 0.10083,
      "grad_norm": 1.021553444751305,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 10083
    },
    {
      "epoch": 0.10084,
      "grad_norm": 1.148442579664875,
      "learning_rate": 0.003,
      "loss": 4.0007,
      "step": 10084
    },
    {
      "epoch": 0.10085,
      "grad_norm": 1.0104285734886607,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 10085
    },
    {
      "epoch": 0.10086,
      "grad_norm": 1.1379868586708077,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 10086
    },
    {
      "epoch": 0.10087,
      "grad_norm": 1.186592736125496,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 10087
    },
    {
      "epoch": 0.10088,
      "grad_norm": 1.0060310853482595,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 10088
    },
    {
      "epoch": 0.10089,
      "grad_norm": 1.0640640849166343,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 10089
    },
    {
      "epoch": 0.1009,
      "grad_norm": 1.3560044661047932,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 10090
    },
    {
      "epoch": 0.10091,
      "grad_norm": 1.1078241142090288,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 10091
    },
    {
      "epoch": 0.10092,
      "grad_norm": 1.0068291505885563,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 10092
    },
    {
      "epoch": 0.10093,
      "grad_norm": 1.0599705107020405,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 10093
    },
    {
      "epoch": 0.10094,
      "grad_norm": 1.0876529083055502,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 10094
    },
    {
      "epoch": 0.10095,
      "grad_norm": 1.0961661282027626,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 10095
    },
    {
      "epoch": 0.10096,
      "grad_norm": 1.374941789482069,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 10096
    },
    {
      "epoch": 0.10097,
      "grad_norm": 0.9082696715295778,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 10097
    },
    {
      "epoch": 0.10098,
      "grad_norm": 1.0771725339779925,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 10098
    },
    {
      "epoch": 0.10099,
      "grad_norm": 1.0182992509469744,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 10099
    },
    {
      "epoch": 0.101,
      "grad_norm": 1.1428363685009697,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 10100
    },
    {
      "epoch": 0.10101,
      "grad_norm": 0.9694039996712733,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 10101
    },
    {
      "epoch": 0.10102,
      "grad_norm": 1.0472023173402798,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 10102
    },
    {
      "epoch": 0.10103,
      "grad_norm": 1.312075296718054,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 10103
    },
    {
      "epoch": 0.10104,
      "grad_norm": 1.069956052695916,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 10104
    },
    {
      "epoch": 0.10105,
      "grad_norm": 1.2931704494456024,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 10105
    },
    {
      "epoch": 0.10106,
      "grad_norm": 1.1208723791794828,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 10106
    },
    {
      "epoch": 0.10107,
      "grad_norm": 1.1935123431071022,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 10107
    },
    {
      "epoch": 0.10108,
      "grad_norm": 1.1350366451944691,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 10108
    },
    {
      "epoch": 0.10109,
      "grad_norm": 1.3607305740575226,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 10109
    },
    {
      "epoch": 0.1011,
      "grad_norm": 0.9949435252832233,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 10110
    },
    {
      "epoch": 0.10111,
      "grad_norm": 1.2015497083429867,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 10111
    },
    {
      "epoch": 0.10112,
      "grad_norm": 1.0762531833294637,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 10112
    },
    {
      "epoch": 0.10113,
      "grad_norm": 1.3607307522223708,
      "learning_rate": 0.003,
      "loss": 4.0098,
      "step": 10113
    },
    {
      "epoch": 0.10114,
      "grad_norm": 0.8372866022166425,
      "learning_rate": 0.003,
      "loss": 4.0048,
      "step": 10114
    },
    {
      "epoch": 0.10115,
      "grad_norm": 1.169337588013949,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 10115
    },
    {
      "epoch": 0.10116,
      "grad_norm": 1.1177277939964916,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 10116
    },
    {
      "epoch": 0.10117,
      "grad_norm": 1.1778175104666457,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 10117
    },
    {
      "epoch": 0.10118,
      "grad_norm": 1.120506464897047,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 10118
    },
    {
      "epoch": 0.10119,
      "grad_norm": 1.1474574001683169,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 10119
    },
    {
      "epoch": 0.1012,
      "grad_norm": 0.8783084850811431,
      "learning_rate": 0.003,
      "loss": 4.015,
      "step": 10120
    },
    {
      "epoch": 0.10121,
      "grad_norm": 0.9780743863814633,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 10121
    },
    {
      "epoch": 0.10122,
      "grad_norm": 1.235259685072935,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 10122
    },
    {
      "epoch": 0.10123,
      "grad_norm": 0.8532144497138836,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 10123
    },
    {
      "epoch": 0.10124,
      "grad_norm": 1.0588961816914821,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 10124
    },
    {
      "epoch": 0.10125,
      "grad_norm": 1.0997888688267687,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 10125
    },
    {
      "epoch": 0.10126,
      "grad_norm": 1.012973351895914,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 10126
    },
    {
      "epoch": 0.10127,
      "grad_norm": 1.1601701911571694,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 10127
    },
    {
      "epoch": 0.10128,
      "grad_norm": 0.8661155150916877,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 10128
    },
    {
      "epoch": 0.10129,
      "grad_norm": 1.01187696037196,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 10129
    },
    {
      "epoch": 0.1013,
      "grad_norm": 1.377560868899849,
      "learning_rate": 0.003,
      "loss": 4.008,
      "step": 10130
    },
    {
      "epoch": 0.10131,
      "grad_norm": 1.2225045415187876,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 10131
    },
    {
      "epoch": 0.10132,
      "grad_norm": 1.0805752380760862,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 10132
    },
    {
      "epoch": 0.10133,
      "grad_norm": 1.1689334091154489,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 10133
    },
    {
      "epoch": 0.10134,
      "grad_norm": 1.3581198813392088,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 10134
    },
    {
      "epoch": 0.10135,
      "grad_norm": 0.7676340885206456,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 10135
    },
    {
      "epoch": 0.10136,
      "grad_norm": 1.0234587289832506,
      "learning_rate": 0.003,
      "loss": 4.0083,
      "step": 10136
    },
    {
      "epoch": 0.10137,
      "grad_norm": 1.301339315671966,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 10137
    },
    {
      "epoch": 0.10138,
      "grad_norm": 0.9786141586414955,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 10138
    },
    {
      "epoch": 0.10139,
      "grad_norm": 1.227946401625274,
      "learning_rate": 0.003,
      "loss": 4.0004,
      "step": 10139
    },
    {
      "epoch": 0.1014,
      "grad_norm": 1.1398058430097253,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 10140
    },
    {
      "epoch": 0.10141,
      "grad_norm": 1.2511523411004795,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 10141
    },
    {
      "epoch": 0.10142,
      "grad_norm": 1.0530250525908387,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 10142
    },
    {
      "epoch": 0.10143,
      "grad_norm": 1.2429388861453106,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 10143
    },
    {
      "epoch": 0.10144,
      "grad_norm": 0.8865877007140897,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 10144
    },
    {
      "epoch": 0.10145,
      "grad_norm": 1.0163852178819597,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 10145
    },
    {
      "epoch": 0.10146,
      "grad_norm": 1.3657614044980302,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 10146
    },
    {
      "epoch": 0.10147,
      "grad_norm": 0.9950226203446018,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 10147
    },
    {
      "epoch": 0.10148,
      "grad_norm": 1.1980345153961933,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 10148
    },
    {
      "epoch": 0.10149,
      "grad_norm": 1.019226564296152,
      "learning_rate": 0.003,
      "loss": 3.9982,
      "step": 10149
    },
    {
      "epoch": 0.1015,
      "grad_norm": 0.965993025404742,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 10150
    },
    {
      "epoch": 0.10151,
      "grad_norm": 1.3679875752682615,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 10151
    },
    {
      "epoch": 0.10152,
      "grad_norm": 1.0149437487586224,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 10152
    },
    {
      "epoch": 0.10153,
      "grad_norm": 1.3162415560624259,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 10153
    },
    {
      "epoch": 0.10154,
      "grad_norm": 1.0889175982737318,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 10154
    },
    {
      "epoch": 0.10155,
      "grad_norm": 1.108256381768691,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 10155
    },
    {
      "epoch": 0.10156,
      "grad_norm": 1.0449850400339091,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 10156
    },
    {
      "epoch": 0.10157,
      "grad_norm": 1.2857069143842972,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 10157
    },
    {
      "epoch": 0.10158,
      "grad_norm": 0.96497871026518,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 10158
    },
    {
      "epoch": 0.10159,
      "grad_norm": 1.2412464612862566,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 10159
    },
    {
      "epoch": 0.1016,
      "grad_norm": 1.0649011421878327,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 10160
    },
    {
      "epoch": 0.10161,
      "grad_norm": 1.3828387996970612,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 10161
    },
    {
      "epoch": 0.10162,
      "grad_norm": 1.0268495952079426,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 10162
    },
    {
      "epoch": 0.10163,
      "grad_norm": 1.2209399315758185,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 10163
    },
    {
      "epoch": 0.10164,
      "grad_norm": 1.0382424289978558,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 10164
    },
    {
      "epoch": 0.10165,
      "grad_norm": 1.1431393692734733,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 10165
    },
    {
      "epoch": 0.10166,
      "grad_norm": 1.025676794642058,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 10166
    },
    {
      "epoch": 0.10167,
      "grad_norm": 1.0773528056179977,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 10167
    },
    {
      "epoch": 0.10168,
      "grad_norm": 0.9799372640648593,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 10168
    },
    {
      "epoch": 0.10169,
      "grad_norm": 1.3573925168119307,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 10169
    },
    {
      "epoch": 0.1017,
      "grad_norm": 1.069207482050234,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 10170
    },
    {
      "epoch": 0.10171,
      "grad_norm": 1.1246476134224932,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 10171
    },
    {
      "epoch": 0.10172,
      "grad_norm": 1.0443602726524843,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 10172
    },
    {
      "epoch": 0.10173,
      "grad_norm": 1.2312653223111745,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 10173
    },
    {
      "epoch": 0.10174,
      "grad_norm": 1.1122257381493827,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 10174
    },
    {
      "epoch": 0.10175,
      "grad_norm": 1.3749066864648898,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 10175
    },
    {
      "epoch": 0.10176,
      "grad_norm": 0.9339307181884096,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 10176
    },
    {
      "epoch": 0.10177,
      "grad_norm": 0.8986097272545992,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 10177
    },
    {
      "epoch": 0.10178,
      "grad_norm": 0.9946058914616095,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 10178
    },
    {
      "epoch": 0.10179,
      "grad_norm": 1.499674078146875,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 10179
    },
    {
      "epoch": 0.1018,
      "grad_norm": 0.9496966180142791,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 10180
    },
    {
      "epoch": 0.10181,
      "grad_norm": 1.311618816110452,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 10181
    },
    {
      "epoch": 0.10182,
      "grad_norm": 1.0708634479960368,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 10182
    },
    {
      "epoch": 0.10183,
      "grad_norm": 1.1113870081016295,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 10183
    },
    {
      "epoch": 0.10184,
      "grad_norm": 1.1178353224836102,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 10184
    },
    {
      "epoch": 0.10185,
      "grad_norm": 1.1689044556545598,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 10185
    },
    {
      "epoch": 0.10186,
      "grad_norm": 1.419581610460463,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 10186
    },
    {
      "epoch": 0.10187,
      "grad_norm": 1.1733245395221399,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 10187
    },
    {
      "epoch": 0.10188,
      "grad_norm": 0.9580355128633431,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 10188
    },
    {
      "epoch": 0.10189,
      "grad_norm": 1.1722566739996738,
      "learning_rate": 0.003,
      "loss": 3.9995,
      "step": 10189
    },
    {
      "epoch": 0.1019,
      "grad_norm": 1.1612042034180012,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 10190
    },
    {
      "epoch": 0.10191,
      "grad_norm": 1.195329589772822,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 10191
    },
    {
      "epoch": 0.10192,
      "grad_norm": 1.0846595164302155,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 10192
    },
    {
      "epoch": 0.10193,
      "grad_norm": 1.1488860249777917,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 10193
    },
    {
      "epoch": 0.10194,
      "grad_norm": 1.1895044545007407,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 10194
    },
    {
      "epoch": 0.10195,
      "grad_norm": 0.9769595430999226,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 10195
    },
    {
      "epoch": 0.10196,
      "grad_norm": 1.1017689182766344,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 10196
    },
    {
      "epoch": 0.10197,
      "grad_norm": 1.0793074132780305,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 10197
    },
    {
      "epoch": 0.10198,
      "grad_norm": 1.0506607464798967,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 10198
    },
    {
      "epoch": 0.10199,
      "grad_norm": 1.1837831773812761,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 10199
    },
    {
      "epoch": 0.102,
      "grad_norm": 1.076073379069326,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 10200
    },
    {
      "epoch": 0.10201,
      "grad_norm": 1.0825848802681943,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 10201
    },
    {
      "epoch": 0.10202,
      "grad_norm": 1.3241527514135416,
      "learning_rate": 0.003,
      "loss": 4.0058,
      "step": 10202
    },
    {
      "epoch": 0.10203,
      "grad_norm": 1.1724175197204518,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 10203
    },
    {
      "epoch": 0.10204,
      "grad_norm": 1.3185840871244239,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 10204
    },
    {
      "epoch": 0.10205,
      "grad_norm": 0.9734256552070326,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 10205
    },
    {
      "epoch": 0.10206,
      "grad_norm": 1.1022386726905078,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 10206
    },
    {
      "epoch": 0.10207,
      "grad_norm": 1.173133415900628,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 10207
    },
    {
      "epoch": 0.10208,
      "grad_norm": 1.1305215932951866,
      "learning_rate": 0.003,
      "loss": 3.9987,
      "step": 10208
    },
    {
      "epoch": 0.10209,
      "grad_norm": 1.190358205023772,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 10209
    },
    {
      "epoch": 0.1021,
      "grad_norm": 1.065896145776302,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 10210
    },
    {
      "epoch": 0.10211,
      "grad_norm": 1.3102703273182168,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 10211
    },
    {
      "epoch": 0.10212,
      "grad_norm": 0.9660207653575211,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 10212
    },
    {
      "epoch": 0.10213,
      "grad_norm": 0.9543652414333238,
      "learning_rate": 0.003,
      "loss": 4.0047,
      "step": 10213
    },
    {
      "epoch": 0.10214,
      "grad_norm": 1.036805427205904,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 10214
    },
    {
      "epoch": 0.10215,
      "grad_norm": 1.057033327613356,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 10215
    },
    {
      "epoch": 0.10216,
      "grad_norm": 1.134350884528531,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 10216
    },
    {
      "epoch": 0.10217,
      "grad_norm": 1.1648146450494354,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 10217
    },
    {
      "epoch": 0.10218,
      "grad_norm": 1.4506285684005091,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 10218
    },
    {
      "epoch": 0.10219,
      "grad_norm": 0.9222728969038609,
      "learning_rate": 0.003,
      "loss": 4.0009,
      "step": 10219
    },
    {
      "epoch": 0.1022,
      "grad_norm": 1.0654406081445127,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 10220
    },
    {
      "epoch": 0.10221,
      "grad_norm": 1.252155041126181,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 10221
    },
    {
      "epoch": 0.10222,
      "grad_norm": 1.206778847995836,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 10222
    },
    {
      "epoch": 0.10223,
      "grad_norm": 1.2444900182503111,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 10223
    },
    {
      "epoch": 0.10224,
      "grad_norm": 1.040598030326006,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 10224
    },
    {
      "epoch": 0.10225,
      "grad_norm": 1.4878752264835577,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 10225
    },
    {
      "epoch": 0.10226,
      "grad_norm": 0.9402450092696958,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 10226
    },
    {
      "epoch": 0.10227,
      "grad_norm": 1.2884407987069295,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 10227
    },
    {
      "epoch": 0.10228,
      "grad_norm": 1.1522519965955926,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 10228
    },
    {
      "epoch": 0.10229,
      "grad_norm": 1.0468946409066122,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 10229
    },
    {
      "epoch": 0.1023,
      "grad_norm": 1.2964574881601119,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 10230
    },
    {
      "epoch": 0.10231,
      "grad_norm": 1.24484224526044,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 10231
    },
    {
      "epoch": 0.10232,
      "grad_norm": 1.0853521484203787,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 10232
    },
    {
      "epoch": 0.10233,
      "grad_norm": 1.065069643441235,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 10233
    },
    {
      "epoch": 0.10234,
      "grad_norm": 1.1844230860579215,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 10234
    },
    {
      "epoch": 0.10235,
      "grad_norm": 1.1858854955074398,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 10235
    },
    {
      "epoch": 0.10236,
      "grad_norm": 1.155823762579354,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 10236
    },
    {
      "epoch": 0.10237,
      "grad_norm": 1.259243563598782,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 10237
    },
    {
      "epoch": 0.10238,
      "grad_norm": 1.021888249686767,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 10238
    },
    {
      "epoch": 0.10239,
      "grad_norm": 1.2726229230148722,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 10239
    },
    {
      "epoch": 0.1024,
      "grad_norm": 0.8686221007981468,
      "learning_rate": 0.003,
      "loss": 3.9857,
      "step": 10240
    },
    {
      "epoch": 0.10241,
      "grad_norm": 1.0195922987283028,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 10241
    },
    {
      "epoch": 0.10242,
      "grad_norm": 1.4977495826980316,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 10242
    },
    {
      "epoch": 0.10243,
      "grad_norm": 0.8331453442599753,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 10243
    },
    {
      "epoch": 0.10244,
      "grad_norm": 0.9904099892605175,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 10244
    },
    {
      "epoch": 0.10245,
      "grad_norm": 1.1062252978239564,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 10245
    },
    {
      "epoch": 0.10246,
      "grad_norm": 1.2991428997983552,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 10246
    },
    {
      "epoch": 0.10247,
      "grad_norm": 1.017899165985093,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 10247
    },
    {
      "epoch": 0.10248,
      "grad_norm": 1.1832127454060877,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 10248
    },
    {
      "epoch": 0.10249,
      "grad_norm": 1.1079456008025137,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 10249
    },
    {
      "epoch": 0.1025,
      "grad_norm": 1.3362106747505438,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 10250
    },
    {
      "epoch": 0.10251,
      "grad_norm": 1.1117749161341806,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 10251
    },
    {
      "epoch": 0.10252,
      "grad_norm": 1.2698367881846497,
      "learning_rate": 0.003,
      "loss": 4.0158,
      "step": 10252
    },
    {
      "epoch": 0.10253,
      "grad_norm": 1.149957054689331,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 10253
    },
    {
      "epoch": 0.10254,
      "grad_norm": 0.9917763597068653,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 10254
    },
    {
      "epoch": 0.10255,
      "grad_norm": 1.1692203345741474,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 10255
    },
    {
      "epoch": 0.10256,
      "grad_norm": 1.084143742321343,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 10256
    },
    {
      "epoch": 0.10257,
      "grad_norm": 1.2380367761444084,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 10257
    },
    {
      "epoch": 0.10258,
      "grad_norm": 1.1407215637889365,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 10258
    },
    {
      "epoch": 0.10259,
      "grad_norm": 1.1517658724662878,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 10259
    },
    {
      "epoch": 0.1026,
      "grad_norm": 1.025390717700848,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 10260
    },
    {
      "epoch": 0.10261,
      "grad_norm": 1.2024437713037104,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 10261
    },
    {
      "epoch": 0.10262,
      "grad_norm": 1.0958713174379848,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 10262
    },
    {
      "epoch": 0.10263,
      "grad_norm": 0.8765257399245001,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 10263
    },
    {
      "epoch": 0.10264,
      "grad_norm": 0.9633993597005445,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 10264
    },
    {
      "epoch": 0.10265,
      "grad_norm": 1.35821204523184,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 10265
    },
    {
      "epoch": 0.10266,
      "grad_norm": 1.0219625008871738,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 10266
    },
    {
      "epoch": 0.10267,
      "grad_norm": 1.2227874326162127,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 10267
    },
    {
      "epoch": 0.10268,
      "grad_norm": 0.978586011050389,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 10268
    },
    {
      "epoch": 0.10269,
      "grad_norm": 1.3366116601016778,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 10269
    },
    {
      "epoch": 0.1027,
      "grad_norm": 1.203267489591858,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 10270
    },
    {
      "epoch": 0.10271,
      "grad_norm": 1.098001417832544,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 10271
    },
    {
      "epoch": 0.10272,
      "grad_norm": 1.0307246792918792,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 10272
    },
    {
      "epoch": 0.10273,
      "grad_norm": 1.3053322880296834,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 10273
    },
    {
      "epoch": 0.10274,
      "grad_norm": 1.1385323131916913,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 10274
    },
    {
      "epoch": 0.10275,
      "grad_norm": 1.1261509616015806,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 10275
    },
    {
      "epoch": 0.10276,
      "grad_norm": 1.2868774897178286,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 10276
    },
    {
      "epoch": 0.10277,
      "grad_norm": 1.0360335060829537,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 10277
    },
    {
      "epoch": 0.10278,
      "grad_norm": 1.051397255526473,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 10278
    },
    {
      "epoch": 0.10279,
      "grad_norm": 1.1689181356787075,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 10279
    },
    {
      "epoch": 0.1028,
      "grad_norm": 1.0026627281522649,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 10280
    },
    {
      "epoch": 0.10281,
      "grad_norm": 1.2879919756618505,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 10281
    },
    {
      "epoch": 0.10282,
      "grad_norm": 1.244730230167987,
      "learning_rate": 0.003,
      "loss": 4.0151,
      "step": 10282
    },
    {
      "epoch": 0.10283,
      "grad_norm": 1.3076934827367117,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 10283
    },
    {
      "epoch": 0.10284,
      "grad_norm": 1.1046390773617638,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 10284
    },
    {
      "epoch": 0.10285,
      "grad_norm": 1.4413050093320747,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 10285
    },
    {
      "epoch": 0.10286,
      "grad_norm": 0.8441280250112908,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 10286
    },
    {
      "epoch": 0.10287,
      "grad_norm": 1.1644552941443256,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 10287
    },
    {
      "epoch": 0.10288,
      "grad_norm": 1.253613939194171,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 10288
    },
    {
      "epoch": 0.10289,
      "grad_norm": 1.1992464347629659,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 10289
    },
    {
      "epoch": 0.1029,
      "grad_norm": 1.1614080288077706,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 10290
    },
    {
      "epoch": 0.10291,
      "grad_norm": 1.1200552133267128,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 10291
    },
    {
      "epoch": 0.10292,
      "grad_norm": 1.1209107524052186,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 10292
    },
    {
      "epoch": 0.10293,
      "grad_norm": 1.1739986737744281,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 10293
    },
    {
      "epoch": 0.10294,
      "grad_norm": 0.9094006847879885,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 10294
    },
    {
      "epoch": 0.10295,
      "grad_norm": 1.0626685311138218,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 10295
    },
    {
      "epoch": 0.10296,
      "grad_norm": 1.3323240753459744,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 10296
    },
    {
      "epoch": 0.10297,
      "grad_norm": 0.9873549849936493,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 10297
    },
    {
      "epoch": 0.10298,
      "grad_norm": 1.1100743664279358,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 10298
    },
    {
      "epoch": 0.10299,
      "grad_norm": 1.2118577543293596,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 10299
    },
    {
      "epoch": 0.103,
      "grad_norm": 1.1347265113461813,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 10300
    },
    {
      "epoch": 0.10301,
      "grad_norm": 1.0447143445167029,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 10301
    },
    {
      "epoch": 0.10302,
      "grad_norm": 1.051062325381112,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 10302
    },
    {
      "epoch": 0.10303,
      "grad_norm": 1.0323975069194364,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 10303
    },
    {
      "epoch": 0.10304,
      "grad_norm": 1.2789211780935934,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 10304
    },
    {
      "epoch": 0.10305,
      "grad_norm": 1.005336751800442,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 10305
    },
    {
      "epoch": 0.10306,
      "grad_norm": 1.1730045793816195,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 10306
    },
    {
      "epoch": 0.10307,
      "grad_norm": 1.1183555686080375,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 10307
    },
    {
      "epoch": 0.10308,
      "grad_norm": 1.0617744506344082,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 10308
    },
    {
      "epoch": 0.10309,
      "grad_norm": 1.22078736450434,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 10309
    },
    {
      "epoch": 0.1031,
      "grad_norm": 0.9980699183199506,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 10310
    },
    {
      "epoch": 0.10311,
      "grad_norm": 1.2462656361373519,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 10311
    },
    {
      "epoch": 0.10312,
      "grad_norm": 1.0351182027362364,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 10312
    },
    {
      "epoch": 0.10313,
      "grad_norm": 1.287680378354591,
      "learning_rate": 0.003,
      "loss": 4.0116,
      "step": 10313
    },
    {
      "epoch": 0.10314,
      "grad_norm": 0.9076021123086481,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 10314
    },
    {
      "epoch": 0.10315,
      "grad_norm": 1.1481668973991306,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 10315
    },
    {
      "epoch": 0.10316,
      "grad_norm": 1.0780477730461324,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 10316
    },
    {
      "epoch": 0.10317,
      "grad_norm": 1.1892911855004693,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 10317
    },
    {
      "epoch": 0.10318,
      "grad_norm": 1.0047821885404769,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 10318
    },
    {
      "epoch": 0.10319,
      "grad_norm": 1.28222936870825,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 10319
    },
    {
      "epoch": 0.1032,
      "grad_norm": 1.1296157925908548,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 10320
    },
    {
      "epoch": 0.10321,
      "grad_norm": 1.4853389937950883,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 10321
    },
    {
      "epoch": 0.10322,
      "grad_norm": 0.9787569170059056,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 10322
    },
    {
      "epoch": 0.10323,
      "grad_norm": 1.0906735449814466,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 10323
    },
    {
      "epoch": 0.10324,
      "grad_norm": 1.068837791641826,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 10324
    },
    {
      "epoch": 0.10325,
      "grad_norm": 1.2037337762025848,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 10325
    },
    {
      "epoch": 0.10326,
      "grad_norm": 1.1133172543387875,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 10326
    },
    {
      "epoch": 0.10327,
      "grad_norm": 1.3167960269468966,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 10327
    },
    {
      "epoch": 0.10328,
      "grad_norm": 1.0696207926347543,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 10328
    },
    {
      "epoch": 0.10329,
      "grad_norm": 1.245399604421406,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 10329
    },
    {
      "epoch": 0.1033,
      "grad_norm": 1.0059697543492918,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 10330
    },
    {
      "epoch": 0.10331,
      "grad_norm": 1.5656251418654314,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 10331
    },
    {
      "epoch": 0.10332,
      "grad_norm": 0.8574115626605384,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 10332
    },
    {
      "epoch": 0.10333,
      "grad_norm": 0.9859350180122279,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 10333
    },
    {
      "epoch": 0.10334,
      "grad_norm": 1.2820418030806104,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 10334
    },
    {
      "epoch": 0.10335,
      "grad_norm": 0.8731451613756506,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 10335
    },
    {
      "epoch": 0.10336,
      "grad_norm": 1.014757675534356,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 10336
    },
    {
      "epoch": 0.10337,
      "grad_norm": 1.4909413920937094,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 10337
    },
    {
      "epoch": 0.10338,
      "grad_norm": 0.7816794866102409,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 10338
    },
    {
      "epoch": 0.10339,
      "grad_norm": 0.9243690877719841,
      "learning_rate": 0.003,
      "loss": 4.0053,
      "step": 10339
    },
    {
      "epoch": 0.1034,
      "grad_norm": 1.14694380987657,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 10340
    },
    {
      "epoch": 0.10341,
      "grad_norm": 1.3834936067183423,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 10341
    },
    {
      "epoch": 0.10342,
      "grad_norm": 1.1154396048927036,
      "learning_rate": 0.003,
      "loss": 4.0105,
      "step": 10342
    },
    {
      "epoch": 0.10343,
      "grad_norm": 1.1273639336085786,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 10343
    },
    {
      "epoch": 0.10344,
      "grad_norm": 1.1608493429486209,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 10344
    },
    {
      "epoch": 0.10345,
      "grad_norm": 1.06990959346778,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 10345
    },
    {
      "epoch": 0.10346,
      "grad_norm": 1.2479747749226988,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 10346
    },
    {
      "epoch": 0.10347,
      "grad_norm": 0.9737474653691929,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 10347
    },
    {
      "epoch": 0.10348,
      "grad_norm": 1.1819821090166085,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 10348
    },
    {
      "epoch": 0.10349,
      "grad_norm": 1.167019349942472,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 10349
    },
    {
      "epoch": 0.1035,
      "grad_norm": 1.3519029606445285,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 10350
    },
    {
      "epoch": 0.10351,
      "grad_norm": 0.8593715530349866,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 10351
    },
    {
      "epoch": 0.10352,
      "grad_norm": 0.9127235399409747,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 10352
    },
    {
      "epoch": 0.10353,
      "grad_norm": 1.1579963956603465,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 10353
    },
    {
      "epoch": 0.10354,
      "grad_norm": 1.2797240045316958,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 10354
    },
    {
      "epoch": 0.10355,
      "grad_norm": 0.9612463917392908,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 10355
    },
    {
      "epoch": 0.10356,
      "grad_norm": 1.1043787877283193,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 10356
    },
    {
      "epoch": 0.10357,
      "grad_norm": 1.121926767893712,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 10357
    },
    {
      "epoch": 0.10358,
      "grad_norm": 1.0575829184316827,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 10358
    },
    {
      "epoch": 0.10359,
      "grad_norm": 1.1712412204356015,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 10359
    },
    {
      "epoch": 0.1036,
      "grad_norm": 1.1280743551856558,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 10360
    },
    {
      "epoch": 0.10361,
      "grad_norm": 1.100752064232454,
      "learning_rate": 0.003,
      "loss": 3.9936,
      "step": 10361
    },
    {
      "epoch": 0.10362,
      "grad_norm": 1.3611223839175068,
      "learning_rate": 0.003,
      "loss": 4.0151,
      "step": 10362
    },
    {
      "epoch": 0.10363,
      "grad_norm": 0.9602281325752356,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 10363
    },
    {
      "epoch": 0.10364,
      "grad_norm": 1.2553483424870702,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 10364
    },
    {
      "epoch": 0.10365,
      "grad_norm": 1.4024905356997521,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 10365
    },
    {
      "epoch": 0.10366,
      "grad_norm": 0.9242656529205991,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 10366
    },
    {
      "epoch": 0.10367,
      "grad_norm": 1.1604823265405697,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 10367
    },
    {
      "epoch": 0.10368,
      "grad_norm": 1.1969970485888448,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 10368
    },
    {
      "epoch": 0.10369,
      "grad_norm": 1.0200300949712449,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 10369
    },
    {
      "epoch": 0.1037,
      "grad_norm": 1.2689639441368583,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 10370
    },
    {
      "epoch": 0.10371,
      "grad_norm": 1.0341059906639019,
      "learning_rate": 0.003,
      "loss": 4.0054,
      "step": 10371
    },
    {
      "epoch": 0.10372,
      "grad_norm": 1.2538857040073335,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 10372
    },
    {
      "epoch": 0.10373,
      "grad_norm": 1.1201024616892967,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 10373
    },
    {
      "epoch": 0.10374,
      "grad_norm": 1.2950845025933793,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 10374
    },
    {
      "epoch": 0.10375,
      "grad_norm": 0.9699551875266422,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 10375
    },
    {
      "epoch": 0.10376,
      "grad_norm": 1.1580083785809567,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 10376
    },
    {
      "epoch": 0.10377,
      "grad_norm": 1.1093131687693019,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 10377
    },
    {
      "epoch": 0.10378,
      "grad_norm": 1.2624596205999834,
      "learning_rate": 0.003,
      "loss": 4.0025,
      "step": 10378
    },
    {
      "epoch": 0.10379,
      "grad_norm": 1.0914527517869759,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 10379
    },
    {
      "epoch": 0.1038,
      "grad_norm": 1.151305634171054,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 10380
    },
    {
      "epoch": 0.10381,
      "grad_norm": 1.18506187805289,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 10381
    },
    {
      "epoch": 0.10382,
      "grad_norm": 0.960814714372655,
      "learning_rate": 0.003,
      "loss": 4.0069,
      "step": 10382
    },
    {
      "epoch": 0.10383,
      "grad_norm": 1.1274336145652182,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 10383
    },
    {
      "epoch": 0.10384,
      "grad_norm": 1.0778718704979358,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 10384
    },
    {
      "epoch": 0.10385,
      "grad_norm": 1.3346207537542287,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 10385
    },
    {
      "epoch": 0.10386,
      "grad_norm": 1.1729682327745514,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 10386
    },
    {
      "epoch": 0.10387,
      "grad_norm": 1.036661733873201,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 10387
    },
    {
      "epoch": 0.10388,
      "grad_norm": 1.1267714195794842,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 10388
    },
    {
      "epoch": 0.10389,
      "grad_norm": 1.070601905354444,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 10389
    },
    {
      "epoch": 0.1039,
      "grad_norm": 1.1108942610835553,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 10390
    },
    {
      "epoch": 0.10391,
      "grad_norm": 1.0766563550369346,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 10391
    },
    {
      "epoch": 0.10392,
      "grad_norm": 1.2926774985748823,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 10392
    },
    {
      "epoch": 0.10393,
      "grad_norm": 1.021832754641526,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 10393
    },
    {
      "epoch": 0.10394,
      "grad_norm": 1.2672584449062123,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 10394
    },
    {
      "epoch": 0.10395,
      "grad_norm": 0.9905815246662731,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 10395
    },
    {
      "epoch": 0.10396,
      "grad_norm": 1.0082239163019409,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 10396
    },
    {
      "epoch": 0.10397,
      "grad_norm": 1.2087702334917223,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 10397
    },
    {
      "epoch": 0.10398,
      "grad_norm": 1.3670646129935469,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 10398
    },
    {
      "epoch": 0.10399,
      "grad_norm": 1.0071185422959221,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 10399
    },
    {
      "epoch": 0.104,
      "grad_norm": 1.201312509023756,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 10400
    },
    {
      "epoch": 0.10401,
      "grad_norm": 0.9420904682898148,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 10401
    },
    {
      "epoch": 0.10402,
      "grad_norm": 1.2050493388965062,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 10402
    },
    {
      "epoch": 0.10403,
      "grad_norm": 1.046563241466676,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 10403
    },
    {
      "epoch": 0.10404,
      "grad_norm": 1.1628697809891466,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 10404
    },
    {
      "epoch": 0.10405,
      "grad_norm": 1.3199812512263778,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 10405
    },
    {
      "epoch": 0.10406,
      "grad_norm": 1.0594480118801703,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 10406
    },
    {
      "epoch": 0.10407,
      "grad_norm": 1.1233067919692772,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 10407
    },
    {
      "epoch": 0.10408,
      "grad_norm": 0.9910896481437917,
      "learning_rate": 0.003,
      "loss": 4.0117,
      "step": 10408
    },
    {
      "epoch": 0.10409,
      "grad_norm": 1.138918453922149,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 10409
    },
    {
      "epoch": 0.1041,
      "grad_norm": 1.1821655962343458,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 10410
    },
    {
      "epoch": 0.10411,
      "grad_norm": 1.214060735642106,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 10411
    },
    {
      "epoch": 0.10412,
      "grad_norm": 1.1271952204136202,
      "learning_rate": 0.003,
      "loss": 3.9872,
      "step": 10412
    },
    {
      "epoch": 0.10413,
      "grad_norm": 0.9720082652463884,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 10413
    },
    {
      "epoch": 0.10414,
      "grad_norm": 1.1999444084371422,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 10414
    },
    {
      "epoch": 0.10415,
      "grad_norm": 1.073604246138116,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 10415
    },
    {
      "epoch": 0.10416,
      "grad_norm": 1.108459057982964,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 10416
    },
    {
      "epoch": 0.10417,
      "grad_norm": 1.271581283838925,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 10417
    },
    {
      "epoch": 0.10418,
      "grad_norm": 1.079645522515878,
      "learning_rate": 0.003,
      "loss": 4.0095,
      "step": 10418
    },
    {
      "epoch": 0.10419,
      "grad_norm": 1.1152248774580158,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 10419
    },
    {
      "epoch": 0.1042,
      "grad_norm": 1.3480810386551398,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 10420
    },
    {
      "epoch": 0.10421,
      "grad_norm": 0.9290743153390996,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 10421
    },
    {
      "epoch": 0.10422,
      "grad_norm": 1.0777250714984647,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 10422
    },
    {
      "epoch": 0.10423,
      "grad_norm": 1.2450132081404977,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 10423
    },
    {
      "epoch": 0.10424,
      "grad_norm": 0.9622549883886152,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 10424
    },
    {
      "epoch": 0.10425,
      "grad_norm": 1.1529396436416368,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 10425
    },
    {
      "epoch": 0.10426,
      "grad_norm": 1.0658110282776228,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 10426
    },
    {
      "epoch": 0.10427,
      "grad_norm": 1.0117761795322662,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 10427
    },
    {
      "epoch": 0.10428,
      "grad_norm": 1.1685995916063228,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 10428
    },
    {
      "epoch": 0.10429,
      "grad_norm": 1.0923828450042794,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 10429
    },
    {
      "epoch": 0.1043,
      "grad_norm": 1.1655387142481999,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 10430
    },
    {
      "epoch": 0.10431,
      "grad_norm": 1.3531912916034343,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 10431
    },
    {
      "epoch": 0.10432,
      "grad_norm": 0.9691209365956966,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 10432
    },
    {
      "epoch": 0.10433,
      "grad_norm": 1.0876767583765015,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 10433
    },
    {
      "epoch": 0.10434,
      "grad_norm": 1.0893989324457354,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 10434
    },
    {
      "epoch": 0.10435,
      "grad_norm": 1.1011090775263708,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 10435
    },
    {
      "epoch": 0.10436,
      "grad_norm": 1.2651035810376114,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 10436
    },
    {
      "epoch": 0.10437,
      "grad_norm": 1.077788583905916,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 10437
    },
    {
      "epoch": 0.10438,
      "grad_norm": 1.089658709482987,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 10438
    },
    {
      "epoch": 0.10439,
      "grad_norm": 1.3526434745275007,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 10439
    },
    {
      "epoch": 0.1044,
      "grad_norm": 1.072260827630628,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 10440
    },
    {
      "epoch": 0.10441,
      "grad_norm": 1.275665138867706,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 10441
    },
    {
      "epoch": 0.10442,
      "grad_norm": 0.8780574008022751,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 10442
    },
    {
      "epoch": 0.10443,
      "grad_norm": 1.0997214258568853,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 10443
    },
    {
      "epoch": 0.10444,
      "grad_norm": 1.3431512939323784,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 10444
    },
    {
      "epoch": 0.10445,
      "grad_norm": 1.2074116232555316,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 10445
    },
    {
      "epoch": 0.10446,
      "grad_norm": 1.087105337997175,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 10446
    },
    {
      "epoch": 0.10447,
      "grad_norm": 1.0707463528471768,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 10447
    },
    {
      "epoch": 0.10448,
      "grad_norm": 1.0692522472019002,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 10448
    },
    {
      "epoch": 0.10449,
      "grad_norm": 1.001943199192615,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 10449
    },
    {
      "epoch": 0.1045,
      "grad_norm": 1.259242230186131,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 10450
    },
    {
      "epoch": 0.10451,
      "grad_norm": 0.8956322006755586,
      "learning_rate": 0.003,
      "loss": 4.0,
      "step": 10451
    },
    {
      "epoch": 0.10452,
      "grad_norm": 1.2342703982705026,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 10452
    },
    {
      "epoch": 0.10453,
      "grad_norm": 1.1659755217419676,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 10453
    },
    {
      "epoch": 0.10454,
      "grad_norm": 1.329730366250319,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 10454
    },
    {
      "epoch": 0.10455,
      "grad_norm": 1.0525902475820774,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 10455
    },
    {
      "epoch": 0.10456,
      "grad_norm": 1.4730676027395804,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 10456
    },
    {
      "epoch": 0.10457,
      "grad_norm": 0.902647065287986,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 10457
    },
    {
      "epoch": 0.10458,
      "grad_norm": 0.9366078692778497,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 10458
    },
    {
      "epoch": 0.10459,
      "grad_norm": 1.185490872765169,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 10459
    },
    {
      "epoch": 0.1046,
      "grad_norm": 1.220542856466773,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 10460
    },
    {
      "epoch": 0.10461,
      "grad_norm": 1.0971020768947664,
      "learning_rate": 0.003,
      "loss": 4.008,
      "step": 10461
    },
    {
      "epoch": 0.10462,
      "grad_norm": 1.0948259417406494,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 10462
    },
    {
      "epoch": 0.10463,
      "grad_norm": 1.0964751192354854,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 10463
    },
    {
      "epoch": 0.10464,
      "grad_norm": 1.1441448993227044,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 10464
    },
    {
      "epoch": 0.10465,
      "grad_norm": 1.1473924677302787,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 10465
    },
    {
      "epoch": 0.10466,
      "grad_norm": 1.1495513149677226,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 10466
    },
    {
      "epoch": 0.10467,
      "grad_norm": 1.0544675114536122,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 10467
    },
    {
      "epoch": 0.10468,
      "grad_norm": 1.34298439731449,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 10468
    },
    {
      "epoch": 0.10469,
      "grad_norm": 1.2021399413747627,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 10469
    },
    {
      "epoch": 0.1047,
      "grad_norm": 1.1449386988739325,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 10470
    },
    {
      "epoch": 0.10471,
      "grad_norm": 1.0404658350920764,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 10471
    },
    {
      "epoch": 0.10472,
      "grad_norm": 1.2753272835439624,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 10472
    },
    {
      "epoch": 0.10473,
      "grad_norm": 1.1913855993707885,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 10473
    },
    {
      "epoch": 0.10474,
      "grad_norm": 1.1501830644015107,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 10474
    },
    {
      "epoch": 0.10475,
      "grad_norm": 1.243458316706119,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 10475
    },
    {
      "epoch": 0.10476,
      "grad_norm": 1.209956907576902,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 10476
    },
    {
      "epoch": 0.10477,
      "grad_norm": 1.0020350040143609,
      "learning_rate": 0.003,
      "loss": 4.0111,
      "step": 10477
    },
    {
      "epoch": 0.10478,
      "grad_norm": 1.0396708338378342,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 10478
    },
    {
      "epoch": 0.10479,
      "grad_norm": 1.1150470684477871,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 10479
    },
    {
      "epoch": 0.1048,
      "grad_norm": 1.122637069690026,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 10480
    },
    {
      "epoch": 0.10481,
      "grad_norm": 1.4318413431939903,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 10481
    },
    {
      "epoch": 0.10482,
      "grad_norm": 1.0791992375976056,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 10482
    },
    {
      "epoch": 0.10483,
      "grad_norm": 1.3065570899996903,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 10483
    },
    {
      "epoch": 0.10484,
      "grad_norm": 1.1874032550959215,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 10484
    },
    {
      "epoch": 0.10485,
      "grad_norm": 1.1893565695289274,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 10485
    },
    {
      "epoch": 0.10486,
      "grad_norm": 1.097831179509903,
      "learning_rate": 0.003,
      "loss": 3.9988,
      "step": 10486
    },
    {
      "epoch": 0.10487,
      "grad_norm": 1.2993291047275668,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 10487
    },
    {
      "epoch": 0.10488,
      "grad_norm": 0.9496980239335198,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 10488
    },
    {
      "epoch": 0.10489,
      "grad_norm": 1.0629884631995594,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 10489
    },
    {
      "epoch": 0.1049,
      "grad_norm": 1.0603268965374313,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 10490
    },
    {
      "epoch": 0.10491,
      "grad_norm": 1.208967187962485,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 10491
    },
    {
      "epoch": 0.10492,
      "grad_norm": 1.2629214617983036,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 10492
    },
    {
      "epoch": 0.10493,
      "grad_norm": 1.2124437867421798,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 10493
    },
    {
      "epoch": 0.10494,
      "grad_norm": 0.9449400933558595,
      "learning_rate": 0.003,
      "loss": 3.9937,
      "step": 10494
    },
    {
      "epoch": 0.10495,
      "grad_norm": 1.1234515875951618,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 10495
    },
    {
      "epoch": 0.10496,
      "grad_norm": 1.0139506348014824,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 10496
    },
    {
      "epoch": 0.10497,
      "grad_norm": 1.3582522269851591,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 10497
    },
    {
      "epoch": 0.10498,
      "grad_norm": 0.8485161156664304,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 10498
    },
    {
      "epoch": 0.10499,
      "grad_norm": 0.9682827176949959,
      "learning_rate": 0.003,
      "loss": 4.0119,
      "step": 10499
    },
    {
      "epoch": 0.105,
      "grad_norm": 1.3767621333474114,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 10500
    },
    {
      "epoch": 0.10501,
      "grad_norm": 0.9911284707155722,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 10501
    },
    {
      "epoch": 0.10502,
      "grad_norm": 1.2098800890232249,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 10502
    },
    {
      "epoch": 0.10503,
      "grad_norm": 1.125482625749235,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 10503
    },
    {
      "epoch": 0.10504,
      "grad_norm": 1.049702069475873,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 10504
    },
    {
      "epoch": 0.10505,
      "grad_norm": 1.2213586640000345,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 10505
    },
    {
      "epoch": 0.10506,
      "grad_norm": 1.315035994618202,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 10506
    },
    {
      "epoch": 0.10507,
      "grad_norm": 1.340531389565643,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 10507
    },
    {
      "epoch": 0.10508,
      "grad_norm": 0.9990031849773587,
      "learning_rate": 0.003,
      "loss": 4.0073,
      "step": 10508
    },
    {
      "epoch": 0.10509,
      "grad_norm": 1.1660130233668518,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 10509
    },
    {
      "epoch": 0.1051,
      "grad_norm": 1.039379676526276,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 10510
    },
    {
      "epoch": 0.10511,
      "grad_norm": 1.338586383397336,
      "learning_rate": 0.003,
      "loss": 4.0073,
      "step": 10511
    },
    {
      "epoch": 0.10512,
      "grad_norm": 1.0249548531611892,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 10512
    },
    {
      "epoch": 0.10513,
      "grad_norm": 1.2470715536322723,
      "learning_rate": 0.003,
      "loss": 4.003,
      "step": 10513
    },
    {
      "epoch": 0.10514,
      "grad_norm": 0.9573916157628471,
      "learning_rate": 0.003,
      "loss": 3.9872,
      "step": 10514
    },
    {
      "epoch": 0.10515,
      "grad_norm": 1.299107259749984,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 10515
    },
    {
      "epoch": 0.10516,
      "grad_norm": 1.01524445765738,
      "learning_rate": 0.003,
      "loss": 4.0,
      "step": 10516
    },
    {
      "epoch": 0.10517,
      "grad_norm": 1.1149804894227142,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 10517
    },
    {
      "epoch": 0.10518,
      "grad_norm": 1.1890411136167975,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 10518
    },
    {
      "epoch": 0.10519,
      "grad_norm": 0.9364490204330106,
      "learning_rate": 0.003,
      "loss": 4.005,
      "step": 10519
    },
    {
      "epoch": 0.1052,
      "grad_norm": 1.1387642541625675,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 10520
    },
    {
      "epoch": 0.10521,
      "grad_norm": 1.1839482076334311,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 10521
    },
    {
      "epoch": 0.10522,
      "grad_norm": 1.0014230334791343,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 10522
    },
    {
      "epoch": 0.10523,
      "grad_norm": 1.1543281363304134,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 10523
    },
    {
      "epoch": 0.10524,
      "grad_norm": 1.0058642661701724,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 10524
    },
    {
      "epoch": 0.10525,
      "grad_norm": 1.2600433039619774,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 10525
    },
    {
      "epoch": 0.10526,
      "grad_norm": 1.0491904580994538,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 10526
    },
    {
      "epoch": 0.10527,
      "grad_norm": 1.3835636637824966,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 10527
    },
    {
      "epoch": 0.10528,
      "grad_norm": 1.1190543265923238,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 10528
    },
    {
      "epoch": 0.10529,
      "grad_norm": 1.41063797874233,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 10529
    },
    {
      "epoch": 0.1053,
      "grad_norm": 1.0017135846243632,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 10530
    },
    {
      "epoch": 0.10531,
      "grad_norm": 1.1349481015715086,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 10531
    },
    {
      "epoch": 0.10532,
      "grad_norm": 1.0974147690879539,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 10532
    },
    {
      "epoch": 0.10533,
      "grad_norm": 1.1318583086940592,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 10533
    },
    {
      "epoch": 0.10534,
      "grad_norm": 0.9728943881000612,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 10534
    },
    {
      "epoch": 0.10535,
      "grad_norm": 1.4760229533414277,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 10535
    },
    {
      "epoch": 0.10536,
      "grad_norm": 1.0178312131559886,
      "learning_rate": 0.003,
      "loss": 4.0033,
      "step": 10536
    },
    {
      "epoch": 0.10537,
      "grad_norm": 1.326688326472958,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 10537
    },
    {
      "epoch": 0.10538,
      "grad_norm": 0.9540964220451233,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 10538
    },
    {
      "epoch": 0.10539,
      "grad_norm": 1.1867065524476788,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 10539
    },
    {
      "epoch": 0.1054,
      "grad_norm": 1.306268363150594,
      "learning_rate": 0.003,
      "loss": 4.0031,
      "step": 10540
    },
    {
      "epoch": 0.10541,
      "grad_norm": 1.1821939453890575,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 10541
    },
    {
      "epoch": 0.10542,
      "grad_norm": 1.340191153094188,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 10542
    },
    {
      "epoch": 0.10543,
      "grad_norm": 0.8866083438164943,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 10543
    },
    {
      "epoch": 0.10544,
      "grad_norm": 1.1034227189294226,
      "learning_rate": 0.003,
      "loss": 4.0158,
      "step": 10544
    },
    {
      "epoch": 0.10545,
      "grad_norm": 1.145569873679018,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 10545
    },
    {
      "epoch": 0.10546,
      "grad_norm": 1.2439976954614562,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 10546
    },
    {
      "epoch": 0.10547,
      "grad_norm": 0.9219452650458853,
      "learning_rate": 0.003,
      "loss": 4.0075,
      "step": 10547
    },
    {
      "epoch": 0.10548,
      "grad_norm": 0.9976599090984258,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 10548
    },
    {
      "epoch": 0.10549,
      "grad_norm": 1.2774703253681363,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 10549
    },
    {
      "epoch": 0.1055,
      "grad_norm": 1.1539905793697123,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 10550
    },
    {
      "epoch": 0.10551,
      "grad_norm": 1.135911995287088,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 10551
    },
    {
      "epoch": 0.10552,
      "grad_norm": 1.0527742384461596,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 10552
    },
    {
      "epoch": 0.10553,
      "grad_norm": 1.322002490054715,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 10553
    },
    {
      "epoch": 0.10554,
      "grad_norm": 1.0084031327815373,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 10554
    },
    {
      "epoch": 0.10555,
      "grad_norm": 1.0808986156972835,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 10555
    },
    {
      "epoch": 0.10556,
      "grad_norm": 1.1216797816863153,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 10556
    },
    {
      "epoch": 0.10557,
      "grad_norm": 1.1475055052985905,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 10557
    },
    {
      "epoch": 0.10558,
      "grad_norm": 1.1119323781963724,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 10558
    },
    {
      "epoch": 0.10559,
      "grad_norm": 1.2460835632404752,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 10559
    },
    {
      "epoch": 0.1056,
      "grad_norm": 1.2138261319567527,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 10560
    },
    {
      "epoch": 0.10561,
      "grad_norm": 1.0483615457288105,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 10561
    },
    {
      "epoch": 0.10562,
      "grad_norm": 1.0118320288891192,
      "learning_rate": 0.003,
      "loss": 4.0047,
      "step": 10562
    },
    {
      "epoch": 0.10563,
      "grad_norm": 1.3644503768047236,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 10563
    },
    {
      "epoch": 0.10564,
      "grad_norm": 0.8013831438598815,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 10564
    },
    {
      "epoch": 0.10565,
      "grad_norm": 0.9612754540930174,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 10565
    },
    {
      "epoch": 0.10566,
      "grad_norm": 1.2366075679735475,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 10566
    },
    {
      "epoch": 0.10567,
      "grad_norm": 1.155623176742999,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 10567
    },
    {
      "epoch": 0.10568,
      "grad_norm": 1.1220562684114699,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 10568
    },
    {
      "epoch": 0.10569,
      "grad_norm": 1.163422842918069,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 10569
    },
    {
      "epoch": 0.1057,
      "grad_norm": 1.1409717638103616,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 10570
    },
    {
      "epoch": 0.10571,
      "grad_norm": 0.9910760818265895,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 10571
    },
    {
      "epoch": 0.10572,
      "grad_norm": 1.1832943348904186,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 10572
    },
    {
      "epoch": 0.10573,
      "grad_norm": 1.0817165438920588,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 10573
    },
    {
      "epoch": 0.10574,
      "grad_norm": 1.2883743060276551,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 10574
    },
    {
      "epoch": 0.10575,
      "grad_norm": 0.8503051859413028,
      "learning_rate": 0.003,
      "loss": 4.0056,
      "step": 10575
    },
    {
      "epoch": 0.10576,
      "grad_norm": 1.0469525545832647,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 10576
    },
    {
      "epoch": 0.10577,
      "grad_norm": 1.509719903394313,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 10577
    },
    {
      "epoch": 0.10578,
      "grad_norm": 0.9933503276360827,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 10578
    },
    {
      "epoch": 0.10579,
      "grad_norm": 1.1546805042641897,
      "learning_rate": 0.003,
      "loss": 4.0015,
      "step": 10579
    },
    {
      "epoch": 0.1058,
      "grad_norm": 1.10717494330016,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 10580
    },
    {
      "epoch": 0.10581,
      "grad_norm": 1.0811061670677058,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 10581
    },
    {
      "epoch": 0.10582,
      "grad_norm": 1.1040005473249752,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 10582
    },
    {
      "epoch": 0.10583,
      "grad_norm": 1.2803656515646102,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 10583
    },
    {
      "epoch": 0.10584,
      "grad_norm": 1.3090939277354992,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 10584
    },
    {
      "epoch": 0.10585,
      "grad_norm": 1.0847880823254095,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 10585
    },
    {
      "epoch": 0.10586,
      "grad_norm": 1.2200835861910564,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 10586
    },
    {
      "epoch": 0.10587,
      "grad_norm": 0.8932627861610137,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 10587
    },
    {
      "epoch": 0.10588,
      "grad_norm": 1.139469360617456,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 10588
    },
    {
      "epoch": 0.10589,
      "grad_norm": 1.4344930118024855,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 10589
    },
    {
      "epoch": 0.1059,
      "grad_norm": 1.1102643686509654,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 10590
    },
    {
      "epoch": 0.10591,
      "grad_norm": 1.1481517513413337,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 10591
    },
    {
      "epoch": 0.10592,
      "grad_norm": 1.122474591041532,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 10592
    },
    {
      "epoch": 0.10593,
      "grad_norm": 1.2016781924295667,
      "learning_rate": 0.003,
      "loss": 4.005,
      "step": 10593
    },
    {
      "epoch": 0.10594,
      "grad_norm": 1.07376059125612,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 10594
    },
    {
      "epoch": 0.10595,
      "grad_norm": 1.2904669765461627,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 10595
    },
    {
      "epoch": 0.10596,
      "grad_norm": 0.8779132609347672,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 10596
    },
    {
      "epoch": 0.10597,
      "grad_norm": 1.003771536112303,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 10597
    },
    {
      "epoch": 0.10598,
      "grad_norm": 1.0707616836208425,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 10598
    },
    {
      "epoch": 0.10599,
      "grad_norm": 0.9955175174902907,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 10599
    },
    {
      "epoch": 0.106,
      "grad_norm": 1.3851273303582732,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 10600
    },
    {
      "epoch": 0.10601,
      "grad_norm": 0.8326152872121492,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 10601
    },
    {
      "epoch": 0.10602,
      "grad_norm": 0.9405825691994378,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 10602
    },
    {
      "epoch": 0.10603,
      "grad_norm": 1.175319134130477,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 10603
    },
    {
      "epoch": 0.10604,
      "grad_norm": 1.2400190358250294,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 10604
    },
    {
      "epoch": 0.10605,
      "grad_norm": 1.3117115346590698,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 10605
    },
    {
      "epoch": 0.10606,
      "grad_norm": 0.8357581858102279,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 10606
    },
    {
      "epoch": 0.10607,
      "grad_norm": 1.2186083536406302,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 10607
    },
    {
      "epoch": 0.10608,
      "grad_norm": 1.3549499721362552,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 10608
    },
    {
      "epoch": 0.10609,
      "grad_norm": 1.0359508960606705,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 10609
    },
    {
      "epoch": 0.1061,
      "grad_norm": 1.234427345167838,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 10610
    },
    {
      "epoch": 0.10611,
      "grad_norm": 1.1049507794457696,
      "learning_rate": 0.003,
      "loss": 3.9948,
      "step": 10611
    },
    {
      "epoch": 0.10612,
      "grad_norm": 1.1588897238356,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 10612
    },
    {
      "epoch": 0.10613,
      "grad_norm": 1.0882437441177095,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 10613
    },
    {
      "epoch": 0.10614,
      "grad_norm": 1.1661564975883256,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 10614
    },
    {
      "epoch": 0.10615,
      "grad_norm": 0.9756194205234838,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 10615
    },
    {
      "epoch": 0.10616,
      "grad_norm": 1.2136123225863367,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 10616
    },
    {
      "epoch": 0.10617,
      "grad_norm": 1.004520655549312,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 10617
    },
    {
      "epoch": 0.10618,
      "grad_norm": 1.293437758220031,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 10618
    },
    {
      "epoch": 0.10619,
      "grad_norm": 1.0745903694460457,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 10619
    },
    {
      "epoch": 0.1062,
      "grad_norm": 1.349386028002116,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 10620
    },
    {
      "epoch": 0.10621,
      "grad_norm": 1.0691689780916256,
      "learning_rate": 0.003,
      "loss": 4.0138,
      "step": 10621
    },
    {
      "epoch": 0.10622,
      "grad_norm": 1.224772531790401,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 10622
    },
    {
      "epoch": 0.10623,
      "grad_norm": 1.153790588423269,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 10623
    },
    {
      "epoch": 0.10624,
      "grad_norm": 1.193236273945861,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 10624
    },
    {
      "epoch": 0.10625,
      "grad_norm": 1.0654745501906075,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 10625
    },
    {
      "epoch": 0.10626,
      "grad_norm": 1.3261426114898907,
      "learning_rate": 0.003,
      "loss": 4.0064,
      "step": 10626
    },
    {
      "epoch": 0.10627,
      "grad_norm": 0.8982241457991678,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 10627
    },
    {
      "epoch": 0.10628,
      "grad_norm": 1.0114549713020966,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 10628
    },
    {
      "epoch": 0.10629,
      "grad_norm": 1.1489582478278977,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 10629
    },
    {
      "epoch": 0.1063,
      "grad_norm": 1.1444061673753105,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 10630
    },
    {
      "epoch": 0.10631,
      "grad_norm": 1.1576074418215807,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 10631
    },
    {
      "epoch": 0.10632,
      "grad_norm": 1.3627959422433176,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 10632
    },
    {
      "epoch": 0.10633,
      "grad_norm": 1.06565267900167,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 10633
    },
    {
      "epoch": 0.10634,
      "grad_norm": 1.307799627360254,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 10634
    },
    {
      "epoch": 0.10635,
      "grad_norm": 0.865797003148197,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 10635
    },
    {
      "epoch": 0.10636,
      "grad_norm": 1.0276563707055373,
      "learning_rate": 0.003,
      "loss": 4.0129,
      "step": 10636
    },
    {
      "epoch": 0.10637,
      "grad_norm": 1.179328642984521,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 10637
    },
    {
      "epoch": 0.10638,
      "grad_norm": 1.1306397758583546,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 10638
    },
    {
      "epoch": 0.10639,
      "grad_norm": 1.1379536938728096,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 10639
    },
    {
      "epoch": 0.1064,
      "grad_norm": 1.0529351002129548,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 10640
    },
    {
      "epoch": 0.10641,
      "grad_norm": 1.4104967072137549,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 10641
    },
    {
      "epoch": 0.10642,
      "grad_norm": 0.9234934199965433,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 10642
    },
    {
      "epoch": 0.10643,
      "grad_norm": 1.060649329346942,
      "learning_rate": 0.003,
      "loss": 4.0074,
      "step": 10643
    },
    {
      "epoch": 0.10644,
      "grad_norm": 1.1653395641699265,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 10644
    },
    {
      "epoch": 0.10645,
      "grad_norm": 1.05447570499781,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 10645
    },
    {
      "epoch": 0.10646,
      "grad_norm": 1.0414075025866627,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 10646
    },
    {
      "epoch": 0.10647,
      "grad_norm": 1.1643043878647805,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 10647
    },
    {
      "epoch": 0.10648,
      "grad_norm": 1.0684722197241294,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 10648
    },
    {
      "epoch": 0.10649,
      "grad_norm": 1.3080634492785814,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 10649
    },
    {
      "epoch": 0.1065,
      "grad_norm": 1.2923711297653349,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 10650
    },
    {
      "epoch": 0.10651,
      "grad_norm": 1.1133144821639869,
      "learning_rate": 0.003,
      "loss": 3.9937,
      "step": 10651
    },
    {
      "epoch": 0.10652,
      "grad_norm": 1.1508639513581163,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 10652
    },
    {
      "epoch": 0.10653,
      "grad_norm": 1.1860732849874172,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 10653
    },
    {
      "epoch": 0.10654,
      "grad_norm": 1.0365296506149477,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 10654
    },
    {
      "epoch": 0.10655,
      "grad_norm": 0.9561188769757217,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 10655
    },
    {
      "epoch": 0.10656,
      "grad_norm": 1.0940765619925885,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 10656
    },
    {
      "epoch": 0.10657,
      "grad_norm": 1.3722387207935904,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 10657
    },
    {
      "epoch": 0.10658,
      "grad_norm": 1.1503504406176477,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 10658
    },
    {
      "epoch": 0.10659,
      "grad_norm": 0.966012228781889,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 10659
    },
    {
      "epoch": 0.1066,
      "grad_norm": 1.0144525965138471,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 10660
    },
    {
      "epoch": 0.10661,
      "grad_norm": 1.2051087661883464,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 10661
    },
    {
      "epoch": 0.10662,
      "grad_norm": 1.0764523304400608,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 10662
    },
    {
      "epoch": 0.10663,
      "grad_norm": 1.209131260208791,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 10663
    },
    {
      "epoch": 0.10664,
      "grad_norm": 1.0568957147582319,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 10664
    },
    {
      "epoch": 0.10665,
      "grad_norm": 1.3417015377120707,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 10665
    },
    {
      "epoch": 0.10666,
      "grad_norm": 0.9362281562951543,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 10666
    },
    {
      "epoch": 0.10667,
      "grad_norm": 1.1589003721865672,
      "learning_rate": 0.003,
      "loss": 3.997,
      "step": 10667
    },
    {
      "epoch": 0.10668,
      "grad_norm": 1.2601287656447024,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 10668
    },
    {
      "epoch": 0.10669,
      "grad_norm": 1.061780696809955,
      "learning_rate": 0.003,
      "loss": 3.9927,
      "step": 10669
    },
    {
      "epoch": 0.1067,
      "grad_norm": 1.1903387328335544,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 10670
    },
    {
      "epoch": 0.10671,
      "grad_norm": 1.0301208869856568,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 10671
    },
    {
      "epoch": 0.10672,
      "grad_norm": 1.226842299928559,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 10672
    },
    {
      "epoch": 0.10673,
      "grad_norm": 1.1866348712385926,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 10673
    },
    {
      "epoch": 0.10674,
      "grad_norm": 1.0069430418737448,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 10674
    },
    {
      "epoch": 0.10675,
      "grad_norm": 1.2985241581711995,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 10675
    },
    {
      "epoch": 0.10676,
      "grad_norm": 1.0289286257118662,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 10676
    },
    {
      "epoch": 0.10677,
      "grad_norm": 1.1701123709616597,
      "learning_rate": 0.003,
      "loss": 3.9978,
      "step": 10677
    },
    {
      "epoch": 0.10678,
      "grad_norm": 1.1718815214063174,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 10678
    },
    {
      "epoch": 0.10679,
      "grad_norm": 1.2943084912685583,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 10679
    },
    {
      "epoch": 0.1068,
      "grad_norm": 1.1957684694159134,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 10680
    },
    {
      "epoch": 0.10681,
      "grad_norm": 1.0034264649008786,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 10681
    },
    {
      "epoch": 0.10682,
      "grad_norm": 1.2761049621546057,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 10682
    },
    {
      "epoch": 0.10683,
      "grad_norm": 1.0598055696545081,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 10683
    },
    {
      "epoch": 0.10684,
      "grad_norm": 1.218444770158993,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 10684
    },
    {
      "epoch": 0.10685,
      "grad_norm": 0.9258871795824707,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 10685
    },
    {
      "epoch": 0.10686,
      "grad_norm": 1.133386229580556,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 10686
    },
    {
      "epoch": 0.10687,
      "grad_norm": 1.2653062418463612,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 10687
    },
    {
      "epoch": 0.10688,
      "grad_norm": 1.184995544981333,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 10688
    },
    {
      "epoch": 0.10689,
      "grad_norm": 1.0320005457605674,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 10689
    },
    {
      "epoch": 0.1069,
      "grad_norm": 1.2038777888203986,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 10690
    },
    {
      "epoch": 0.10691,
      "grad_norm": 1.129676406607381,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 10691
    },
    {
      "epoch": 0.10692,
      "grad_norm": 1.3575951971898041,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 10692
    },
    {
      "epoch": 0.10693,
      "grad_norm": 1.1196525664794985,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 10693
    },
    {
      "epoch": 0.10694,
      "grad_norm": 1.215242402090735,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 10694
    },
    {
      "epoch": 0.10695,
      "grad_norm": 1.013327742595491,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 10695
    },
    {
      "epoch": 0.10696,
      "grad_norm": 1.095537976839923,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 10696
    },
    {
      "epoch": 0.10697,
      "grad_norm": 1.2129639377799921,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 10697
    },
    {
      "epoch": 0.10698,
      "grad_norm": 1.1276205907442978,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 10698
    },
    {
      "epoch": 0.10699,
      "grad_norm": 1.4057292020168137,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 10699
    },
    {
      "epoch": 0.107,
      "grad_norm": 1.0889453831761258,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 10700
    },
    {
      "epoch": 0.10701,
      "grad_norm": 1.2636981543117896,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 10701
    },
    {
      "epoch": 0.10702,
      "grad_norm": 1.0444226985618574,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 10702
    },
    {
      "epoch": 0.10703,
      "grad_norm": 1.2951500282214232,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 10703
    },
    {
      "epoch": 0.10704,
      "grad_norm": 1.0393505702059902,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 10704
    },
    {
      "epoch": 0.10705,
      "grad_norm": 1.1797188685102178,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 10705
    },
    {
      "epoch": 0.10706,
      "grad_norm": 1.1975777074687664,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 10706
    },
    {
      "epoch": 0.10707,
      "grad_norm": 1.194075879308646,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 10707
    },
    {
      "epoch": 0.10708,
      "grad_norm": 1.0102354176185093,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 10708
    },
    {
      "epoch": 0.10709,
      "grad_norm": 1.3131806649777147,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 10709
    },
    {
      "epoch": 0.1071,
      "grad_norm": 1.2373109858890716,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 10710
    },
    {
      "epoch": 0.10711,
      "grad_norm": 1.2003702638346156,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 10711
    },
    {
      "epoch": 0.10712,
      "grad_norm": 0.9983255919354295,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 10712
    },
    {
      "epoch": 0.10713,
      "grad_norm": 0.9877113113359729,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 10713
    },
    {
      "epoch": 0.10714,
      "grad_norm": 1.3460252903807608,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 10714
    },
    {
      "epoch": 0.10715,
      "grad_norm": 0.9965502362486884,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 10715
    },
    {
      "epoch": 0.10716,
      "grad_norm": 1.2725890037766499,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 10716
    },
    {
      "epoch": 0.10717,
      "grad_norm": 1.1113072976703275,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 10717
    },
    {
      "epoch": 0.10718,
      "grad_norm": 1.0100186651689251,
      "learning_rate": 0.003,
      "loss": 3.9967,
      "step": 10718
    },
    {
      "epoch": 0.10719,
      "grad_norm": 1.0640701601768137,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 10719
    },
    {
      "epoch": 0.1072,
      "grad_norm": 1.0593061211004513,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 10720
    },
    {
      "epoch": 0.10721,
      "grad_norm": 1.3099431143363085,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 10721
    },
    {
      "epoch": 0.10722,
      "grad_norm": 0.9809915726075293,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 10722
    },
    {
      "epoch": 0.10723,
      "grad_norm": 1.3406390414189182,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 10723
    },
    {
      "epoch": 0.10724,
      "grad_norm": 1.1349839036395613,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 10724
    },
    {
      "epoch": 0.10725,
      "grad_norm": 1.1603735826420933,
      "learning_rate": 0.003,
      "loss": 4.0052,
      "step": 10725
    },
    {
      "epoch": 0.10726,
      "grad_norm": 1.1306634640881634,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 10726
    },
    {
      "epoch": 0.10727,
      "grad_norm": 1.0401524644138374,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 10727
    },
    {
      "epoch": 0.10728,
      "grad_norm": 1.2861473387032047,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 10728
    },
    {
      "epoch": 0.10729,
      "grad_norm": 0.8700171921619617,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 10729
    },
    {
      "epoch": 0.1073,
      "grad_norm": 1.1494268855327163,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 10730
    },
    {
      "epoch": 0.10731,
      "grad_norm": 1.0235158899515824,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 10731
    },
    {
      "epoch": 0.10732,
      "grad_norm": 1.3171688814462412,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 10732
    },
    {
      "epoch": 0.10733,
      "grad_norm": 1.1618346197242664,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 10733
    },
    {
      "epoch": 0.10734,
      "grad_norm": 1.299839347131802,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 10734
    },
    {
      "epoch": 0.10735,
      "grad_norm": 0.9596941109018636,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 10735
    },
    {
      "epoch": 0.10736,
      "grad_norm": 1.047297478533331,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 10736
    },
    {
      "epoch": 0.10737,
      "grad_norm": 1.0623586974481836,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 10737
    },
    {
      "epoch": 0.10738,
      "grad_norm": 0.9993600202006488,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 10738
    },
    {
      "epoch": 0.10739,
      "grad_norm": 1.254811328838207,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 10739
    },
    {
      "epoch": 0.1074,
      "grad_norm": 1.02443107388267,
      "learning_rate": 0.003,
      "loss": 4.0162,
      "step": 10740
    },
    {
      "epoch": 0.10741,
      "grad_norm": 1.053670941966742,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 10741
    },
    {
      "epoch": 0.10742,
      "grad_norm": 1.3054887606936714,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 10742
    },
    {
      "epoch": 0.10743,
      "grad_norm": 1.1815464442351478,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 10743
    },
    {
      "epoch": 0.10744,
      "grad_norm": 1.0439503530541596,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 10744
    },
    {
      "epoch": 0.10745,
      "grad_norm": 1.2910029674176995,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 10745
    },
    {
      "epoch": 0.10746,
      "grad_norm": 1.1171478726339876,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 10746
    },
    {
      "epoch": 0.10747,
      "grad_norm": 1.316025255296466,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 10747
    },
    {
      "epoch": 0.10748,
      "grad_norm": 1.0185714857385384,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 10748
    },
    {
      "epoch": 0.10749,
      "grad_norm": 1.2319455923474587,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 10749
    },
    {
      "epoch": 0.1075,
      "grad_norm": 1.0698239902071434,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 10750
    },
    {
      "epoch": 0.10751,
      "grad_norm": 1.1767304687464153,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 10751
    },
    {
      "epoch": 0.10752,
      "grad_norm": 1.200346526551157,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 10752
    },
    {
      "epoch": 0.10753,
      "grad_norm": 0.8004910788946202,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 10753
    },
    {
      "epoch": 0.10754,
      "grad_norm": 0.9299169462294767,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 10754
    },
    {
      "epoch": 0.10755,
      "grad_norm": 1.1496170197907911,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 10755
    },
    {
      "epoch": 0.10756,
      "grad_norm": 1.3269009046033973,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 10756
    },
    {
      "epoch": 0.10757,
      "grad_norm": 1.01836707032746,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 10757
    },
    {
      "epoch": 0.10758,
      "grad_norm": 1.4360512920311785,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 10758
    },
    {
      "epoch": 0.10759,
      "grad_norm": 0.8198014966019104,
      "learning_rate": 0.003,
      "loss": 3.997,
      "step": 10759
    },
    {
      "epoch": 0.1076,
      "grad_norm": 0.9805123439382194,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 10760
    },
    {
      "epoch": 0.10761,
      "grad_norm": 1.2404719998449543,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 10761
    },
    {
      "epoch": 0.10762,
      "grad_norm": 1.100158666709582,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 10762
    },
    {
      "epoch": 0.10763,
      "grad_norm": 1.255637890490154,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 10763
    },
    {
      "epoch": 0.10764,
      "grad_norm": 1.0281623834755365,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 10764
    },
    {
      "epoch": 0.10765,
      "grad_norm": 1.4149964948090226,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 10765
    },
    {
      "epoch": 0.10766,
      "grad_norm": 0.9836393890872247,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 10766
    },
    {
      "epoch": 0.10767,
      "grad_norm": 1.2741151461584197,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 10767
    },
    {
      "epoch": 0.10768,
      "grad_norm": 1.1238950688453537,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 10768
    },
    {
      "epoch": 0.10769,
      "grad_norm": 1.0948648776611019,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 10769
    },
    {
      "epoch": 0.1077,
      "grad_norm": 1.201055603123503,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 10770
    },
    {
      "epoch": 0.10771,
      "grad_norm": 1.0193171022277756,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 10771
    },
    {
      "epoch": 0.10772,
      "grad_norm": 1.3771536426901734,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 10772
    },
    {
      "epoch": 0.10773,
      "grad_norm": 1.036422327432776,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 10773
    },
    {
      "epoch": 0.10774,
      "grad_norm": 1.3787602954979676,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 10774
    },
    {
      "epoch": 0.10775,
      "grad_norm": 0.9876267992932676,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 10775
    },
    {
      "epoch": 0.10776,
      "grad_norm": 1.2287074239953188,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 10776
    },
    {
      "epoch": 0.10777,
      "grad_norm": 1.1159465296748252,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 10777
    },
    {
      "epoch": 0.10778,
      "grad_norm": 1.2143634624449668,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 10778
    },
    {
      "epoch": 0.10779,
      "grad_norm": 0.8371084824202994,
      "learning_rate": 0.003,
      "loss": 3.9984,
      "step": 10779
    },
    {
      "epoch": 0.1078,
      "grad_norm": 1.0448266717192551,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 10780
    },
    {
      "epoch": 0.10781,
      "grad_norm": 1.339724186849472,
      "learning_rate": 0.003,
      "loss": 4.0087,
      "step": 10781
    },
    {
      "epoch": 0.10782,
      "grad_norm": 0.9542629472880868,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 10782
    },
    {
      "epoch": 0.10783,
      "grad_norm": 1.1863204682968913,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 10783
    },
    {
      "epoch": 0.10784,
      "grad_norm": 1.4462798896316145,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 10784
    },
    {
      "epoch": 0.10785,
      "grad_norm": 1.0951546615964138,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 10785
    },
    {
      "epoch": 0.10786,
      "grad_norm": 1.296430995678855,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 10786
    },
    {
      "epoch": 0.10787,
      "grad_norm": 0.8997746711593995,
      "learning_rate": 0.003,
      "loss": 4.0055,
      "step": 10787
    },
    {
      "epoch": 0.10788,
      "grad_norm": 1.170708369493236,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 10788
    },
    {
      "epoch": 0.10789,
      "grad_norm": 1.1961220945918547,
      "learning_rate": 0.003,
      "loss": 3.981,
      "step": 10789
    },
    {
      "epoch": 0.1079,
      "grad_norm": 1.2476033414148846,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 10790
    },
    {
      "epoch": 0.10791,
      "grad_norm": 1.3938502880572137,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 10791
    },
    {
      "epoch": 0.10792,
      "grad_norm": 0.8287063955222098,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 10792
    },
    {
      "epoch": 0.10793,
      "grad_norm": 0.9968889789622127,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 10793
    },
    {
      "epoch": 0.10794,
      "grad_norm": 1.2636523367148476,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 10794
    },
    {
      "epoch": 0.10795,
      "grad_norm": 0.9320188951631827,
      "learning_rate": 0.003,
      "loss": 3.9924,
      "step": 10795
    },
    {
      "epoch": 0.10796,
      "grad_norm": 1.13871483259714,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 10796
    },
    {
      "epoch": 0.10797,
      "grad_norm": 1.1430569400531418,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 10797
    },
    {
      "epoch": 0.10798,
      "grad_norm": 1.2223707770106316,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 10798
    },
    {
      "epoch": 0.10799,
      "grad_norm": 1.0797391466920707,
      "learning_rate": 0.003,
      "loss": 4.0007,
      "step": 10799
    },
    {
      "epoch": 0.108,
      "grad_norm": 1.5331772300169273,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 10800
    },
    {
      "epoch": 0.10801,
      "grad_norm": 0.8462632416981025,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 10801
    },
    {
      "epoch": 0.10802,
      "grad_norm": 1.1323280804246525,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 10802
    },
    {
      "epoch": 0.10803,
      "grad_norm": 1.2472360350753866,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 10803
    },
    {
      "epoch": 0.10804,
      "grad_norm": 1.0221991986540284,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 10804
    },
    {
      "epoch": 0.10805,
      "grad_norm": 1.193633649724736,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 10805
    },
    {
      "epoch": 0.10806,
      "grad_norm": 1.128446301055873,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 10806
    },
    {
      "epoch": 0.10807,
      "grad_norm": 1.3429005701975214,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 10807
    },
    {
      "epoch": 0.10808,
      "grad_norm": 1.1916229558274094,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 10808
    },
    {
      "epoch": 0.10809,
      "grad_norm": 1.1666311030390335,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 10809
    },
    {
      "epoch": 0.1081,
      "grad_norm": 1.0948133461800402,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 10810
    },
    {
      "epoch": 0.10811,
      "grad_norm": 1.070956586984476,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 10811
    },
    {
      "epoch": 0.10812,
      "grad_norm": 1.1405962531093388,
      "learning_rate": 0.003,
      "loss": 4.0003,
      "step": 10812
    },
    {
      "epoch": 0.10813,
      "grad_norm": 1.1923710781482288,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 10813
    },
    {
      "epoch": 0.10814,
      "grad_norm": 0.9637789414068072,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 10814
    },
    {
      "epoch": 0.10815,
      "grad_norm": 1.1303684936509142,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 10815
    },
    {
      "epoch": 0.10816,
      "grad_norm": 1.1277334351478485,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 10816
    },
    {
      "epoch": 0.10817,
      "grad_norm": 1.394920688077164,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 10817
    },
    {
      "epoch": 0.10818,
      "grad_norm": 0.9247339438137491,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 10818
    },
    {
      "epoch": 0.10819,
      "grad_norm": 1.2827723765119825,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 10819
    },
    {
      "epoch": 0.1082,
      "grad_norm": 1.1027026712478345,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 10820
    },
    {
      "epoch": 0.10821,
      "grad_norm": 1.130402720324394,
      "learning_rate": 0.003,
      "loss": 3.9894,
      "step": 10821
    },
    {
      "epoch": 0.10822,
      "grad_norm": 1.1822569814525696,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 10822
    },
    {
      "epoch": 0.10823,
      "grad_norm": 1.3160765140161788,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 10823
    },
    {
      "epoch": 0.10824,
      "grad_norm": 1.2344419298288998,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 10824
    },
    {
      "epoch": 0.10825,
      "grad_norm": 0.9804550860424565,
      "learning_rate": 0.003,
      "loss": 4.0067,
      "step": 10825
    },
    {
      "epoch": 0.10826,
      "grad_norm": 1.2393166369862887,
      "learning_rate": 0.003,
      "loss": 3.9966,
      "step": 10826
    },
    {
      "epoch": 0.10827,
      "grad_norm": 1.1285558425268936,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 10827
    },
    {
      "epoch": 0.10828,
      "grad_norm": 1.2190487151901024,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 10828
    },
    {
      "epoch": 0.10829,
      "grad_norm": 1.0051177849528747,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 10829
    },
    {
      "epoch": 0.1083,
      "grad_norm": 1.2475552966341636,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 10830
    },
    {
      "epoch": 0.10831,
      "grad_norm": 1.086747469310628,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 10831
    },
    {
      "epoch": 0.10832,
      "grad_norm": 1.2521787369584434,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 10832
    },
    {
      "epoch": 0.10833,
      "grad_norm": 1.1044210264497303,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 10833
    },
    {
      "epoch": 0.10834,
      "grad_norm": 1.121459039289483,
      "learning_rate": 0.003,
      "loss": 3.988,
      "step": 10834
    },
    {
      "epoch": 0.10835,
      "grad_norm": 1.1128708174680606,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 10835
    },
    {
      "epoch": 0.10836,
      "grad_norm": 1.5980637509276823,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 10836
    },
    {
      "epoch": 0.10837,
      "grad_norm": 1.2324712817974364,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 10837
    },
    {
      "epoch": 0.10838,
      "grad_norm": 1.0657770844010468,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 10838
    },
    {
      "epoch": 0.10839,
      "grad_norm": 1.0927040171769402,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 10839
    },
    {
      "epoch": 0.1084,
      "grad_norm": 1.1359360509882592,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 10840
    },
    {
      "epoch": 0.10841,
      "grad_norm": 0.903405768475979,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 10841
    },
    {
      "epoch": 0.10842,
      "grad_norm": 1.227696001765903,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 10842
    },
    {
      "epoch": 0.10843,
      "grad_norm": 1.2265984387203541,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 10843
    },
    {
      "epoch": 0.10844,
      "grad_norm": 1.2973674659059795,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 10844
    },
    {
      "epoch": 0.10845,
      "grad_norm": 1.0539275266266204,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 10845
    },
    {
      "epoch": 0.10846,
      "grad_norm": 1.2771787744436187,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 10846
    },
    {
      "epoch": 0.10847,
      "grad_norm": 1.1019062809673246,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 10847
    },
    {
      "epoch": 0.10848,
      "grad_norm": 1.170952800012063,
      "learning_rate": 0.003,
      "loss": 3.9984,
      "step": 10848
    },
    {
      "epoch": 0.10849,
      "grad_norm": 1.199786332475287,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 10849
    },
    {
      "epoch": 0.1085,
      "grad_norm": 1.0400770912095054,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 10850
    },
    {
      "epoch": 0.10851,
      "grad_norm": 1.169933869139355,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 10851
    },
    {
      "epoch": 0.10852,
      "grad_norm": 1.134282690971657,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 10852
    },
    {
      "epoch": 0.10853,
      "grad_norm": 1.1214683357708541,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 10853
    },
    {
      "epoch": 0.10854,
      "grad_norm": 1.1718280449767655,
      "learning_rate": 0.003,
      "loss": 4.0051,
      "step": 10854
    },
    {
      "epoch": 0.10855,
      "grad_norm": 1.3835381568865763,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 10855
    },
    {
      "epoch": 0.10856,
      "grad_norm": 0.8314414595792815,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 10856
    },
    {
      "epoch": 0.10857,
      "grad_norm": 0.9164952496161265,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 10857
    },
    {
      "epoch": 0.10858,
      "grad_norm": 1.1610451470170327,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 10858
    },
    {
      "epoch": 0.10859,
      "grad_norm": 1.1965855517444213,
      "learning_rate": 0.003,
      "loss": 4.0111,
      "step": 10859
    },
    {
      "epoch": 0.1086,
      "grad_norm": 1.128342345854473,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 10860
    },
    {
      "epoch": 0.10861,
      "grad_norm": 1.2002004859159896,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 10861
    },
    {
      "epoch": 0.10862,
      "grad_norm": 1.2403320918171405,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 10862
    },
    {
      "epoch": 0.10863,
      "grad_norm": 0.9972196835269065,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 10863
    },
    {
      "epoch": 0.10864,
      "grad_norm": 1.387585591578004,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 10864
    },
    {
      "epoch": 0.10865,
      "grad_norm": 0.9421221076213664,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 10865
    },
    {
      "epoch": 0.10866,
      "grad_norm": 1.1948737529367885,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 10866
    },
    {
      "epoch": 0.10867,
      "grad_norm": 1.022212784674362,
      "learning_rate": 0.003,
      "loss": 4.0034,
      "step": 10867
    },
    {
      "epoch": 0.10868,
      "grad_norm": 1.2732584760819687,
      "learning_rate": 0.003,
      "loss": 4.0091,
      "step": 10868
    },
    {
      "epoch": 0.10869,
      "grad_norm": 1.1047154277724311,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 10869
    },
    {
      "epoch": 0.1087,
      "grad_norm": 1.1854424862823498,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 10870
    },
    {
      "epoch": 0.10871,
      "grad_norm": 1.1741520672533723,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 10871
    },
    {
      "epoch": 0.10872,
      "grad_norm": 1.0480602065379063,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 10872
    },
    {
      "epoch": 0.10873,
      "grad_norm": 1.2571567165899944,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 10873
    },
    {
      "epoch": 0.10874,
      "grad_norm": 1.337309331111073,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 10874
    },
    {
      "epoch": 0.10875,
      "grad_norm": 1.2565095414668002,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 10875
    },
    {
      "epoch": 0.10876,
      "grad_norm": 0.9744590907381745,
      "learning_rate": 0.003,
      "loss": 3.9856,
      "step": 10876
    },
    {
      "epoch": 0.10877,
      "grad_norm": 1.3835777093829644,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 10877
    },
    {
      "epoch": 0.10878,
      "grad_norm": 0.9441806514467737,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 10878
    },
    {
      "epoch": 0.10879,
      "grad_norm": 1.1451762360336781,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 10879
    },
    {
      "epoch": 0.1088,
      "grad_norm": 1.1541770622613978,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 10880
    },
    {
      "epoch": 0.10881,
      "grad_norm": 1.1458016315587218,
      "learning_rate": 0.003,
      "loss": 4.0009,
      "step": 10881
    },
    {
      "epoch": 0.10882,
      "grad_norm": 1.070149396095645,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 10882
    },
    {
      "epoch": 0.10883,
      "grad_norm": 1.177388796693508,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 10883
    },
    {
      "epoch": 0.10884,
      "grad_norm": 1.097237368443832,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 10884
    },
    {
      "epoch": 0.10885,
      "grad_norm": 1.200352068902575,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 10885
    },
    {
      "epoch": 0.10886,
      "grad_norm": 1.104974642016532,
      "learning_rate": 0.003,
      "loss": 4.0059,
      "step": 10886
    },
    {
      "epoch": 0.10887,
      "grad_norm": 1.2279008727498621,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 10887
    },
    {
      "epoch": 0.10888,
      "grad_norm": 1.202420390338381,
      "learning_rate": 0.003,
      "loss": 3.9994,
      "step": 10888
    },
    {
      "epoch": 0.10889,
      "grad_norm": 1.0304295601558235,
      "learning_rate": 0.003,
      "loss": 4.0047,
      "step": 10889
    },
    {
      "epoch": 0.1089,
      "grad_norm": 1.3568156975479342,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 10890
    },
    {
      "epoch": 0.10891,
      "grad_norm": 1.106559013336969,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 10891
    },
    {
      "epoch": 0.10892,
      "grad_norm": 1.175561573120817,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 10892
    },
    {
      "epoch": 0.10893,
      "grad_norm": 1.02974471285148,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 10893
    },
    {
      "epoch": 0.10894,
      "grad_norm": 1.186667566372747,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 10894
    },
    {
      "epoch": 0.10895,
      "grad_norm": 0.9317722895107462,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 10895
    },
    {
      "epoch": 0.10896,
      "grad_norm": 1.1702513574970357,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 10896
    },
    {
      "epoch": 0.10897,
      "grad_norm": 1.2355323699421268,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 10897
    },
    {
      "epoch": 0.10898,
      "grad_norm": 1.0319769089185715,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 10898
    },
    {
      "epoch": 0.10899,
      "grad_norm": 1.1782040466486867,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 10899
    },
    {
      "epoch": 0.109,
      "grad_norm": 1.0467952536854728,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 10900
    },
    {
      "epoch": 0.10901,
      "grad_norm": 1.230063746087194,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 10901
    },
    {
      "epoch": 0.10902,
      "grad_norm": 1.256377588857215,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 10902
    },
    {
      "epoch": 0.10903,
      "grad_norm": 1.1730572753174304,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 10903
    },
    {
      "epoch": 0.10904,
      "grad_norm": 1.1307723050871854,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 10904
    },
    {
      "epoch": 0.10905,
      "grad_norm": 1.1510216931005048,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 10905
    },
    {
      "epoch": 0.10906,
      "grad_norm": 1.0156123289857955,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 10906
    },
    {
      "epoch": 0.10907,
      "grad_norm": 1.298179684529916,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 10907
    },
    {
      "epoch": 0.10908,
      "grad_norm": 1.017892580324516,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 10908
    },
    {
      "epoch": 0.10909,
      "grad_norm": 1.4152613686138085,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 10909
    },
    {
      "epoch": 0.1091,
      "grad_norm": 0.9104381971108158,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 10910
    },
    {
      "epoch": 0.10911,
      "grad_norm": 1.1706988285504234,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 10911
    },
    {
      "epoch": 0.10912,
      "grad_norm": 1.1246788359272881,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 10912
    },
    {
      "epoch": 0.10913,
      "grad_norm": 1.2615350196980175,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 10913
    },
    {
      "epoch": 0.10914,
      "grad_norm": 1.0529171886939046,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 10914
    },
    {
      "epoch": 0.10915,
      "grad_norm": 1.181984560865992,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 10915
    },
    {
      "epoch": 0.10916,
      "grad_norm": 0.9398122380586809,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 10916
    },
    {
      "epoch": 0.10917,
      "grad_norm": 1.3986893076006122,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 10917
    },
    {
      "epoch": 0.10918,
      "grad_norm": 1.1386917109617494,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 10918
    },
    {
      "epoch": 0.10919,
      "grad_norm": 1.388526998048085,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 10919
    },
    {
      "epoch": 0.1092,
      "grad_norm": 0.9553367529462157,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 10920
    },
    {
      "epoch": 0.10921,
      "grad_norm": 1.1249097029410848,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 10921
    },
    {
      "epoch": 0.10922,
      "grad_norm": 1.1588772915067118,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 10922
    },
    {
      "epoch": 0.10923,
      "grad_norm": 1.1594370172230615,
      "learning_rate": 0.003,
      "loss": 3.9942,
      "step": 10923
    },
    {
      "epoch": 0.10924,
      "grad_norm": 1.1587584233463495,
      "learning_rate": 0.003,
      "loss": 4.0055,
      "step": 10924
    },
    {
      "epoch": 0.10925,
      "grad_norm": 1.199402284930254,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 10925
    },
    {
      "epoch": 0.10926,
      "grad_norm": 1.3780799925530476,
      "learning_rate": 0.003,
      "loss": 3.998,
      "step": 10926
    },
    {
      "epoch": 0.10927,
      "grad_norm": 0.9547337146677488,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 10927
    },
    {
      "epoch": 0.10928,
      "grad_norm": 1.2312732084033924,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 10928
    },
    {
      "epoch": 0.10929,
      "grad_norm": 1.124638240875568,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 10929
    },
    {
      "epoch": 0.1093,
      "grad_norm": 1.0996981207669076,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 10930
    },
    {
      "epoch": 0.10931,
      "grad_norm": 0.9997241567513053,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 10931
    },
    {
      "epoch": 0.10932,
      "grad_norm": 1.2572811468968264,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 10932
    },
    {
      "epoch": 0.10933,
      "grad_norm": 1.235322426229629,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 10933
    },
    {
      "epoch": 0.10934,
      "grad_norm": 1.203499600140372,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 10934
    },
    {
      "epoch": 0.10935,
      "grad_norm": 1.0560801016003458,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 10935
    },
    {
      "epoch": 0.10936,
      "grad_norm": 1.4290113078300473,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 10936
    },
    {
      "epoch": 0.10937,
      "grad_norm": 0.8838300191155496,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 10937
    },
    {
      "epoch": 0.10938,
      "grad_norm": 1.0970734661026584,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 10938
    },
    {
      "epoch": 0.10939,
      "grad_norm": 1.1495776401685247,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 10939
    },
    {
      "epoch": 0.1094,
      "grad_norm": 1.2065630154932683,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 10940
    },
    {
      "epoch": 0.10941,
      "grad_norm": 1.2374225552061804,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 10941
    },
    {
      "epoch": 0.10942,
      "grad_norm": 1.2524313252271473,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 10942
    },
    {
      "epoch": 0.10943,
      "grad_norm": 1.0258331148557027,
      "learning_rate": 0.003,
      "loss": 4.0158,
      "step": 10943
    },
    {
      "epoch": 0.10944,
      "grad_norm": 1.1367149424101854,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 10944
    },
    {
      "epoch": 0.10945,
      "grad_norm": 1.264314678398142,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 10945
    },
    {
      "epoch": 0.10946,
      "grad_norm": 1.1470856240128633,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 10946
    },
    {
      "epoch": 0.10947,
      "grad_norm": 1.343562409477934,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 10947
    },
    {
      "epoch": 0.10948,
      "grad_norm": 1.1340548072968755,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 10948
    },
    {
      "epoch": 0.10949,
      "grad_norm": 1.1527721030646023,
      "learning_rate": 0.003,
      "loss": 4.0113,
      "step": 10949
    },
    {
      "epoch": 0.1095,
      "grad_norm": 1.0245060674231437,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 10950
    },
    {
      "epoch": 0.10951,
      "grad_norm": 1.094903481253029,
      "learning_rate": 0.003,
      "loss": 4.0063,
      "step": 10951
    },
    {
      "epoch": 0.10952,
      "grad_norm": 1.138549421081459,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 10952
    },
    {
      "epoch": 0.10953,
      "grad_norm": 1.1286975653189033,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 10953
    },
    {
      "epoch": 0.10954,
      "grad_norm": 1.1602190952130804,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 10954
    },
    {
      "epoch": 0.10955,
      "grad_norm": 1.2130192000896474,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 10955
    },
    {
      "epoch": 0.10956,
      "grad_norm": 1.2258014219563267,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 10956
    },
    {
      "epoch": 0.10957,
      "grad_norm": 1.0776761162714221,
      "learning_rate": 0.003,
      "loss": 4.0062,
      "step": 10957
    },
    {
      "epoch": 0.10958,
      "grad_norm": 1.3034903704468932,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 10958
    },
    {
      "epoch": 0.10959,
      "grad_norm": 1.0464894790504649,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 10959
    },
    {
      "epoch": 0.1096,
      "grad_norm": 1.14488623119123,
      "learning_rate": 0.003,
      "loss": 4.0022,
      "step": 10960
    },
    {
      "epoch": 0.10961,
      "grad_norm": 1.115487082003804,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 10961
    },
    {
      "epoch": 0.10962,
      "grad_norm": 1.2309886332824755,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 10962
    },
    {
      "epoch": 0.10963,
      "grad_norm": 1.1406460952529542,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 10963
    },
    {
      "epoch": 0.10964,
      "grad_norm": 1.2092082481970878,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 10964
    },
    {
      "epoch": 0.10965,
      "grad_norm": 1.1089678629170563,
      "learning_rate": 0.003,
      "loss": 3.9934,
      "step": 10965
    },
    {
      "epoch": 0.10966,
      "grad_norm": 1.453982552663511,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 10966
    },
    {
      "epoch": 0.10967,
      "grad_norm": 1.0132525838939015,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 10967
    },
    {
      "epoch": 0.10968,
      "grad_norm": 1.2921121470526886,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 10968
    },
    {
      "epoch": 0.10969,
      "grad_norm": 1.0273381027526496,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 10969
    },
    {
      "epoch": 0.1097,
      "grad_norm": 1.402973935592333,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 10970
    },
    {
      "epoch": 0.10971,
      "grad_norm": 1.1233713113959605,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 10971
    },
    {
      "epoch": 0.10972,
      "grad_norm": 1.1972507884324581,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 10972
    },
    {
      "epoch": 0.10973,
      "grad_norm": 1.0689380384676346,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 10973
    },
    {
      "epoch": 0.10974,
      "grad_norm": 1.2935838341305652,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 10974
    },
    {
      "epoch": 0.10975,
      "grad_norm": 1.2148835363852442,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 10975
    },
    {
      "epoch": 0.10976,
      "grad_norm": 1.1196265735397568,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 10976
    },
    {
      "epoch": 0.10977,
      "grad_norm": 1.0766132416152376,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 10977
    },
    {
      "epoch": 0.10978,
      "grad_norm": 1.262804962527681,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 10978
    },
    {
      "epoch": 0.10979,
      "grad_norm": 0.9073294804824369,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 10979
    },
    {
      "epoch": 0.1098,
      "grad_norm": 1.2605259139634963,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 10980
    },
    {
      "epoch": 0.10981,
      "grad_norm": 0.9680594140582308,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 10981
    },
    {
      "epoch": 0.10982,
      "grad_norm": 1.164726097839411,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 10982
    },
    {
      "epoch": 0.10983,
      "grad_norm": 1.078100686604328,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 10983
    },
    {
      "epoch": 0.10984,
      "grad_norm": 1.27951824157926,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 10984
    },
    {
      "epoch": 0.10985,
      "grad_norm": 1.2483088905929303,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 10985
    },
    {
      "epoch": 0.10986,
      "grad_norm": 1.0764913660255253,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 10986
    },
    {
      "epoch": 0.10987,
      "grad_norm": 1.2325305120406893,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 10987
    },
    {
      "epoch": 0.10988,
      "grad_norm": 1.1074761108120266,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 10988
    },
    {
      "epoch": 0.10989,
      "grad_norm": 1.1438099942455424,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 10989
    },
    {
      "epoch": 0.1099,
      "grad_norm": 0.9531903580271849,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 10990
    },
    {
      "epoch": 0.10991,
      "grad_norm": 1.1261491344660406,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 10991
    },
    {
      "epoch": 0.10992,
      "grad_norm": 1.2458886185212843,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 10992
    },
    {
      "epoch": 0.10993,
      "grad_norm": 0.9909464766748146,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 10993
    },
    {
      "epoch": 0.10994,
      "grad_norm": 1.2594072221547077,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 10994
    },
    {
      "epoch": 0.10995,
      "grad_norm": 1.0286576604311943,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 10995
    },
    {
      "epoch": 0.10996,
      "grad_norm": 1.0879885186981424,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 10996
    },
    {
      "epoch": 0.10997,
      "grad_norm": 1.2251558646601735,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 10997
    },
    {
      "epoch": 0.10998,
      "grad_norm": 1.520992898306493,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 10998
    },
    {
      "epoch": 0.10999,
      "grad_norm": 0.9974930439447344,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 10999
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3984769150579184,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 11000
    },
    {
      "epoch": 0.11001,
      "grad_norm": 0.9811644529227863,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 11001
    },
    {
      "epoch": 0.11002,
      "grad_norm": 1.1450918449781147,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 11002
    },
    {
      "epoch": 0.11003,
      "grad_norm": 1.0480431049370729,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 11003
    },
    {
      "epoch": 0.11004,
      "grad_norm": 1.4695224754258749,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 11004
    },
    {
      "epoch": 0.11005,
      "grad_norm": 1.0012488506178907,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 11005
    },
    {
      "epoch": 0.11006,
      "grad_norm": 1.1097531270381813,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 11006
    },
    {
      "epoch": 0.11007,
      "grad_norm": 1.3457776983843623,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 11007
    },
    {
      "epoch": 0.11008,
      "grad_norm": 0.9945908240179561,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 11008
    },
    {
      "epoch": 0.11009,
      "grad_norm": 1.146031306723099,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 11009
    },
    {
      "epoch": 0.1101,
      "grad_norm": 1.3041512694151578,
      "learning_rate": 0.003,
      "loss": 4.0021,
      "step": 11010
    },
    {
      "epoch": 0.11011,
      "grad_norm": 1.0561894869979496,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 11011
    },
    {
      "epoch": 0.11012,
      "grad_norm": 1.2575408616044952,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 11012
    },
    {
      "epoch": 0.11013,
      "grad_norm": 0.9634053404348774,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 11013
    },
    {
      "epoch": 0.11014,
      "grad_norm": 1.1455573340874126,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 11014
    },
    {
      "epoch": 0.11015,
      "grad_norm": 1.0979373194946567,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 11015
    },
    {
      "epoch": 0.11016,
      "grad_norm": 1.0909505512602309,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 11016
    },
    {
      "epoch": 0.11017,
      "grad_norm": 1.070802229909584,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 11017
    },
    {
      "epoch": 0.11018,
      "grad_norm": 1.0655397569106997,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 11018
    },
    {
      "epoch": 0.11019,
      "grad_norm": 1.2632535966366136,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 11019
    },
    {
      "epoch": 0.1102,
      "grad_norm": 0.958588750896633,
      "learning_rate": 0.003,
      "loss": 4.0034,
      "step": 11020
    },
    {
      "epoch": 0.11021,
      "grad_norm": 1.1602559223950129,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 11021
    },
    {
      "epoch": 0.11022,
      "grad_norm": 0.9786123700993407,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 11022
    },
    {
      "epoch": 0.11023,
      "grad_norm": 1.0897353532712468,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 11023
    },
    {
      "epoch": 0.11024,
      "grad_norm": 1.3783770276098104,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 11024
    },
    {
      "epoch": 0.11025,
      "grad_norm": 0.9920237745810896,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 11025
    },
    {
      "epoch": 0.11026,
      "grad_norm": 1.2990419941936762,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 11026
    },
    {
      "epoch": 0.11027,
      "grad_norm": 1.1054453581494064,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 11027
    },
    {
      "epoch": 0.11028,
      "grad_norm": 1.1552917683687645,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 11028
    },
    {
      "epoch": 0.11029,
      "grad_norm": 1.1868727922087963,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 11029
    },
    {
      "epoch": 0.1103,
      "grad_norm": 0.9825451090946785,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 11030
    },
    {
      "epoch": 0.11031,
      "grad_norm": 1.1804760582921996,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 11031
    },
    {
      "epoch": 0.11032,
      "grad_norm": 1.0624183712164799,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 11032
    },
    {
      "epoch": 0.11033,
      "grad_norm": 1.279437929756929,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 11033
    },
    {
      "epoch": 0.11034,
      "grad_norm": 0.9050557877634389,
      "learning_rate": 0.003,
      "loss": 3.9997,
      "step": 11034
    },
    {
      "epoch": 0.11035,
      "grad_norm": 1.0758467424852527,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 11035
    },
    {
      "epoch": 0.11036,
      "grad_norm": 1.4004957620496878,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 11036
    },
    {
      "epoch": 0.11037,
      "grad_norm": 1.148007136705928,
      "learning_rate": 0.003,
      "loss": 4.0007,
      "step": 11037
    },
    {
      "epoch": 0.11038,
      "grad_norm": 1.1327536508610503,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 11038
    },
    {
      "epoch": 0.11039,
      "grad_norm": 1.1848546858736662,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 11039
    },
    {
      "epoch": 0.1104,
      "grad_norm": 1.323480908182648,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 11040
    },
    {
      "epoch": 0.11041,
      "grad_norm": 0.8971359841886503,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 11041
    },
    {
      "epoch": 0.11042,
      "grad_norm": 1.1769094334599115,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 11042
    },
    {
      "epoch": 0.11043,
      "grad_norm": 1.1666462489026705,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 11043
    },
    {
      "epoch": 0.11044,
      "grad_norm": 1.2253457217850752,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 11044
    },
    {
      "epoch": 0.11045,
      "grad_norm": 1.1206251489359325,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 11045
    },
    {
      "epoch": 0.11046,
      "grad_norm": 1.3392782992158903,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 11046
    },
    {
      "epoch": 0.11047,
      "grad_norm": 1.0113482407477978,
      "learning_rate": 0.003,
      "loss": 4.0081,
      "step": 11047
    },
    {
      "epoch": 0.11048,
      "grad_norm": 1.2434741216349872,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 11048
    },
    {
      "epoch": 0.11049,
      "grad_norm": 1.0690557967832832,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 11049
    },
    {
      "epoch": 0.1105,
      "grad_norm": 1.2906453831596687,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 11050
    },
    {
      "epoch": 0.11051,
      "grad_norm": 1.049645251796059,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 11051
    },
    {
      "epoch": 0.11052,
      "grad_norm": 1.1022370813626103,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 11052
    },
    {
      "epoch": 0.11053,
      "grad_norm": 1.404602485307846,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 11053
    },
    {
      "epoch": 0.11054,
      "grad_norm": 0.9778708210072532,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 11054
    },
    {
      "epoch": 0.11055,
      "grad_norm": 1.0849146668394676,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11055
    },
    {
      "epoch": 0.11056,
      "grad_norm": 1.3525665780096308,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 11056
    },
    {
      "epoch": 0.11057,
      "grad_norm": 0.8663546618979914,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 11057
    },
    {
      "epoch": 0.11058,
      "grad_norm": 0.9349712450004345,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 11058
    },
    {
      "epoch": 0.11059,
      "grad_norm": 1.2419152885907019,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 11059
    },
    {
      "epoch": 0.1106,
      "grad_norm": 1.2015131996587158,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 11060
    },
    {
      "epoch": 0.11061,
      "grad_norm": 1.1726168292055932,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 11061
    },
    {
      "epoch": 0.11062,
      "grad_norm": 1.235236596847503,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 11062
    },
    {
      "epoch": 0.11063,
      "grad_norm": 1.1430465957610831,
      "learning_rate": 0.003,
      "loss": 4.0116,
      "step": 11063
    },
    {
      "epoch": 0.11064,
      "grad_norm": 1.1694916513185551,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 11064
    },
    {
      "epoch": 0.11065,
      "grad_norm": 1.1843858564427805,
      "learning_rate": 0.003,
      "loss": 4.0144,
      "step": 11065
    },
    {
      "epoch": 0.11066,
      "grad_norm": 1.1731005960048595,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 11066
    },
    {
      "epoch": 0.11067,
      "grad_norm": 1.3610597815554937,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 11067
    },
    {
      "epoch": 0.11068,
      "grad_norm": 0.9388571621226179,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 11068
    },
    {
      "epoch": 0.11069,
      "grad_norm": 1.1845720962926707,
      "learning_rate": 0.003,
      "loss": 4.0048,
      "step": 11069
    },
    {
      "epoch": 0.1107,
      "grad_norm": 1.0184914945024592,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 11070
    },
    {
      "epoch": 0.11071,
      "grad_norm": 1.1695843280280722,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 11071
    },
    {
      "epoch": 0.11072,
      "grad_norm": 1.2124753448260535,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 11072
    },
    {
      "epoch": 0.11073,
      "grad_norm": 1.1600313038561658,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 11073
    },
    {
      "epoch": 0.11074,
      "grad_norm": 1.2553421574935741,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 11074
    },
    {
      "epoch": 0.11075,
      "grad_norm": 1.1166830328732873,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 11075
    },
    {
      "epoch": 0.11076,
      "grad_norm": 1.362951644650086,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 11076
    },
    {
      "epoch": 0.11077,
      "grad_norm": 1.0206627313768633,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 11077
    },
    {
      "epoch": 0.11078,
      "grad_norm": 1.137921297110709,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 11078
    },
    {
      "epoch": 0.11079,
      "grad_norm": 1.0636974748852701,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 11079
    },
    {
      "epoch": 0.1108,
      "grad_norm": 1.243205584369264,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 11080
    },
    {
      "epoch": 0.11081,
      "grad_norm": 1.1466850373196094,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 11081
    },
    {
      "epoch": 0.11082,
      "grad_norm": 1.0332780369799524,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 11082
    },
    {
      "epoch": 0.11083,
      "grad_norm": 1.1547474519658834,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 11083
    },
    {
      "epoch": 0.11084,
      "grad_norm": 1.2630821564699577,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 11084
    },
    {
      "epoch": 0.11085,
      "grad_norm": 0.9616550782921469,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 11085
    },
    {
      "epoch": 0.11086,
      "grad_norm": 1.0337612711666608,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 11086
    },
    {
      "epoch": 0.11087,
      "grad_norm": 1.246325517565779,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 11087
    },
    {
      "epoch": 0.11088,
      "grad_norm": 1.1210534817234898,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 11088
    },
    {
      "epoch": 0.11089,
      "grad_norm": 1.1210922280085747,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 11089
    },
    {
      "epoch": 0.1109,
      "grad_norm": 1.2076356656569776,
      "learning_rate": 0.003,
      "loss": 4.0113,
      "step": 11090
    },
    {
      "epoch": 0.11091,
      "grad_norm": 1.0505164908267228,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 11091
    },
    {
      "epoch": 0.11092,
      "grad_norm": 1.116407928167723,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 11092
    },
    {
      "epoch": 0.11093,
      "grad_norm": 1.3679759693794233,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 11093
    },
    {
      "epoch": 0.11094,
      "grad_norm": 0.9149580813928061,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 11094
    },
    {
      "epoch": 0.11095,
      "grad_norm": 1.2127377272290825,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 11095
    },
    {
      "epoch": 0.11096,
      "grad_norm": 1.2654770177418224,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 11096
    },
    {
      "epoch": 0.11097,
      "grad_norm": 1.005101859952365,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 11097
    },
    {
      "epoch": 0.11098,
      "grad_norm": 1.1559183971161302,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 11098
    },
    {
      "epoch": 0.11099,
      "grad_norm": 1.2125995577106417,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 11099
    },
    {
      "epoch": 0.111,
      "grad_norm": 1.1741835677043042,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 11100
    },
    {
      "epoch": 0.11101,
      "grad_norm": 1.0431649381898074,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 11101
    },
    {
      "epoch": 0.11102,
      "grad_norm": 1.1127524956437145,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 11102
    },
    {
      "epoch": 0.11103,
      "grad_norm": 1.4390556394886567,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 11103
    },
    {
      "epoch": 0.11104,
      "grad_norm": 0.8695810012178421,
      "learning_rate": 0.003,
      "loss": 4.0163,
      "step": 11104
    },
    {
      "epoch": 0.11105,
      "grad_norm": 1.2898692002393344,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 11105
    },
    {
      "epoch": 0.11106,
      "grad_norm": 0.9339284088375288,
      "learning_rate": 0.003,
      "loss": 4.0107,
      "step": 11106
    },
    {
      "epoch": 0.11107,
      "grad_norm": 1.002203567794351,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 11107
    },
    {
      "epoch": 0.11108,
      "grad_norm": 1.212964255808357,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 11108
    },
    {
      "epoch": 0.11109,
      "grad_norm": 1.1363148006340882,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 11109
    },
    {
      "epoch": 0.1111,
      "grad_norm": 1.0654823793341264,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 11110
    },
    {
      "epoch": 0.11111,
      "grad_norm": 1.0410737532990986,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 11111
    },
    {
      "epoch": 0.11112,
      "grad_norm": 1.2755289571055364,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 11112
    },
    {
      "epoch": 0.11113,
      "grad_norm": 1.1301440513445171,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 11113
    },
    {
      "epoch": 0.11114,
      "grad_norm": 1.3210030487586704,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 11114
    },
    {
      "epoch": 0.11115,
      "grad_norm": 1.0405738153003652,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 11115
    },
    {
      "epoch": 0.11116,
      "grad_norm": 1.4674668716202037,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 11116
    },
    {
      "epoch": 0.11117,
      "grad_norm": 1.126768367155695,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 11117
    },
    {
      "epoch": 0.11118,
      "grad_norm": 1.4499818568572693,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 11118
    },
    {
      "epoch": 0.11119,
      "grad_norm": 0.8723773760283889,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 11119
    },
    {
      "epoch": 0.1112,
      "grad_norm": 0.9880766918243952,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 11120
    },
    {
      "epoch": 0.11121,
      "grad_norm": 1.233724451927589,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 11121
    },
    {
      "epoch": 0.11122,
      "grad_norm": 0.9592358462304813,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 11122
    },
    {
      "epoch": 0.11123,
      "grad_norm": 0.9374151178986565,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 11123
    },
    {
      "epoch": 0.11124,
      "grad_norm": 1.0746579089776649,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 11124
    },
    {
      "epoch": 0.11125,
      "grad_norm": 1.0272355456391729,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 11125
    },
    {
      "epoch": 0.11126,
      "grad_norm": 1.4696931867563632,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 11126
    },
    {
      "epoch": 0.11127,
      "grad_norm": 0.9306122996930648,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 11127
    },
    {
      "epoch": 0.11128,
      "grad_norm": 1.259426522382338,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 11128
    },
    {
      "epoch": 0.11129,
      "grad_norm": 0.9952764357013387,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 11129
    },
    {
      "epoch": 0.1113,
      "grad_norm": 1.2922884263779666,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 11130
    },
    {
      "epoch": 0.11131,
      "grad_norm": 1.0933870097789917,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 11131
    },
    {
      "epoch": 0.11132,
      "grad_norm": 1.222132471274821,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 11132
    },
    {
      "epoch": 0.11133,
      "grad_norm": 1.086913343875556,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 11133
    },
    {
      "epoch": 0.11134,
      "grad_norm": 1.2038589551456618,
      "learning_rate": 0.003,
      "loss": 4.0088,
      "step": 11134
    },
    {
      "epoch": 0.11135,
      "grad_norm": 1.1630263270645305,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 11135
    },
    {
      "epoch": 0.11136,
      "grad_norm": 1.2109273926288262,
      "learning_rate": 0.003,
      "loss": 4.0059,
      "step": 11136
    },
    {
      "epoch": 0.11137,
      "grad_norm": 1.133767888763714,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 11137
    },
    {
      "epoch": 0.11138,
      "grad_norm": 1.3304986578383604,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 11138
    },
    {
      "epoch": 0.11139,
      "grad_norm": 1.0703377791647029,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 11139
    },
    {
      "epoch": 0.1114,
      "grad_norm": 1.2710614152570363,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 11140
    },
    {
      "epoch": 0.11141,
      "grad_norm": 1.23803074569592,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 11141
    },
    {
      "epoch": 0.11142,
      "grad_norm": 1.3481665548815753,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 11142
    },
    {
      "epoch": 0.11143,
      "grad_norm": 1.07936134160658,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 11143
    },
    {
      "epoch": 0.11144,
      "grad_norm": 1.2330660101095827,
      "learning_rate": 0.003,
      "loss": 4.0021,
      "step": 11144
    },
    {
      "epoch": 0.11145,
      "grad_norm": 1.1001097752215603,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 11145
    },
    {
      "epoch": 0.11146,
      "grad_norm": 1.2439381573445438,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 11146
    },
    {
      "epoch": 0.11147,
      "grad_norm": 0.99219418207008,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 11147
    },
    {
      "epoch": 0.11148,
      "grad_norm": 1.2162762932312685,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 11148
    },
    {
      "epoch": 0.11149,
      "grad_norm": 1.1639544600650762,
      "learning_rate": 0.003,
      "loss": 4.0111,
      "step": 11149
    },
    {
      "epoch": 0.1115,
      "grad_norm": 1.1006116288127763,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 11150
    },
    {
      "epoch": 0.11151,
      "grad_norm": 1.2672886873563878,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 11151
    },
    {
      "epoch": 0.11152,
      "grad_norm": 0.910987023246572,
      "learning_rate": 0.003,
      "loss": 4.0111,
      "step": 11152
    },
    {
      "epoch": 0.11153,
      "grad_norm": 1.101946526056744,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 11153
    },
    {
      "epoch": 0.11154,
      "grad_norm": 1.0947773638822873,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 11154
    },
    {
      "epoch": 0.11155,
      "grad_norm": 1.2702655690371203,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 11155
    },
    {
      "epoch": 0.11156,
      "grad_norm": 1.007909951916138,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 11156
    },
    {
      "epoch": 0.11157,
      "grad_norm": 1.3319173530862718,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 11157
    },
    {
      "epoch": 0.11158,
      "grad_norm": 0.9207828461156834,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 11158
    },
    {
      "epoch": 0.11159,
      "grad_norm": 1.197285438055082,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 11159
    },
    {
      "epoch": 0.1116,
      "grad_norm": 1.0433813495595061,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 11160
    },
    {
      "epoch": 0.11161,
      "grad_norm": 1.081328185673201,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 11161
    },
    {
      "epoch": 0.11162,
      "grad_norm": 1.2926210086878027,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 11162
    },
    {
      "epoch": 0.11163,
      "grad_norm": 0.9963164319297045,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 11163
    },
    {
      "epoch": 0.11164,
      "grad_norm": 1.3281076066936632,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 11164
    },
    {
      "epoch": 0.11165,
      "grad_norm": 0.9999767623058433,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 11165
    },
    {
      "epoch": 0.11166,
      "grad_norm": 1.3510910761006925,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 11166
    },
    {
      "epoch": 0.11167,
      "grad_norm": 0.9733408617008773,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 11167
    },
    {
      "epoch": 0.11168,
      "grad_norm": 1.247793495062047,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 11168
    },
    {
      "epoch": 0.11169,
      "grad_norm": 0.8953094218520867,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 11169
    },
    {
      "epoch": 0.1117,
      "grad_norm": 1.1780557955764552,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 11170
    },
    {
      "epoch": 0.11171,
      "grad_norm": 1.1811278342129226,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 11171
    },
    {
      "epoch": 0.11172,
      "grad_norm": 1.2689331311338854,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 11172
    },
    {
      "epoch": 0.11173,
      "grad_norm": 1.1683719180902132,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 11173
    },
    {
      "epoch": 0.11174,
      "grad_norm": 1.0094485713003973,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 11174
    },
    {
      "epoch": 0.11175,
      "grad_norm": 1.3250881012136548,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 11175
    },
    {
      "epoch": 0.11176,
      "grad_norm": 1.0043723462137841,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 11176
    },
    {
      "epoch": 0.11177,
      "grad_norm": 1.1386272400480386,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 11177
    },
    {
      "epoch": 0.11178,
      "grad_norm": 1.2126593447984106,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 11178
    },
    {
      "epoch": 0.11179,
      "grad_norm": 1.2307687560202298,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 11179
    },
    {
      "epoch": 0.1118,
      "grad_norm": 1.1602217088708568,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 11180
    },
    {
      "epoch": 0.11181,
      "grad_norm": 1.057733354940848,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 11181
    },
    {
      "epoch": 0.11182,
      "grad_norm": 1.1720682053385487,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 11182
    },
    {
      "epoch": 0.11183,
      "grad_norm": 1.1436907781386656,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 11183
    },
    {
      "epoch": 0.11184,
      "grad_norm": 1.3870042277723742,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 11184
    },
    {
      "epoch": 0.11185,
      "grad_norm": 1.0936717631849686,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 11185
    },
    {
      "epoch": 0.11186,
      "grad_norm": 1.3772654953768122,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 11186
    },
    {
      "epoch": 0.11187,
      "grad_norm": 1.1790597204228255,
      "learning_rate": 0.003,
      "loss": 4.0046,
      "step": 11187
    },
    {
      "epoch": 0.11188,
      "grad_norm": 1.1073857604366244,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 11188
    },
    {
      "epoch": 0.11189,
      "grad_norm": 1.130277787761296,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 11189
    },
    {
      "epoch": 0.1119,
      "grad_norm": 1.2313249191247733,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 11190
    },
    {
      "epoch": 0.11191,
      "grad_norm": 1.0068096645265725,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 11191
    },
    {
      "epoch": 0.11192,
      "grad_norm": 1.4926464646825244,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 11192
    },
    {
      "epoch": 0.11193,
      "grad_norm": 1.075022433453773,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 11193
    },
    {
      "epoch": 0.11194,
      "grad_norm": 1.097191178968125,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 11194
    },
    {
      "epoch": 0.11195,
      "grad_norm": 1.1810849460786332,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 11195
    },
    {
      "epoch": 0.11196,
      "grad_norm": 1.1503825490398842,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 11196
    },
    {
      "epoch": 0.11197,
      "grad_norm": 1.0421171730999512,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 11197
    },
    {
      "epoch": 0.11198,
      "grad_norm": 1.0783422809281649,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 11198
    },
    {
      "epoch": 0.11199,
      "grad_norm": 1.133756805734178,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 11199
    },
    {
      "epoch": 0.112,
      "grad_norm": 1.132914344900601,
      "learning_rate": 0.003,
      "loss": 3.9998,
      "step": 11200
    },
    {
      "epoch": 0.11201,
      "grad_norm": 1.3917889105852121,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 11201
    },
    {
      "epoch": 0.11202,
      "grad_norm": 0.9826420636753783,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 11202
    },
    {
      "epoch": 0.11203,
      "grad_norm": 1.35378770158763,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 11203
    },
    {
      "epoch": 0.11204,
      "grad_norm": 1.1631858332700105,
      "learning_rate": 0.003,
      "loss": 4.0101,
      "step": 11204
    },
    {
      "epoch": 0.11205,
      "grad_norm": 1.2921914277100586,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 11205
    },
    {
      "epoch": 0.11206,
      "grad_norm": 1.0898547835621788,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 11206
    },
    {
      "epoch": 0.11207,
      "grad_norm": 1.2652008774282912,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 11207
    },
    {
      "epoch": 0.11208,
      "grad_norm": 0.9934464327612474,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 11208
    },
    {
      "epoch": 0.11209,
      "grad_norm": 1.2684033075929582,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 11209
    },
    {
      "epoch": 0.1121,
      "grad_norm": 1.0481317867453344,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 11210
    },
    {
      "epoch": 0.11211,
      "grad_norm": 1.2804818902359865,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 11211
    },
    {
      "epoch": 0.11212,
      "grad_norm": 1.1905349539801156,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 11212
    },
    {
      "epoch": 0.11213,
      "grad_norm": 1.1425360307860808,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 11213
    },
    {
      "epoch": 0.11214,
      "grad_norm": 1.0912668598159907,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 11214
    },
    {
      "epoch": 0.11215,
      "grad_norm": 1.2077706236952057,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 11215
    },
    {
      "epoch": 0.11216,
      "grad_norm": 1.139473012377748,
      "learning_rate": 0.003,
      "loss": 4.0144,
      "step": 11216
    },
    {
      "epoch": 0.11217,
      "grad_norm": 1.2737949872278453,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 11217
    },
    {
      "epoch": 0.11218,
      "grad_norm": 1.1462612962051975,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 11218
    },
    {
      "epoch": 0.11219,
      "grad_norm": 1.0951419504062592,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 11219
    },
    {
      "epoch": 0.1122,
      "grad_norm": 1.4045206799945849,
      "learning_rate": 0.003,
      "loss": 3.9973,
      "step": 11220
    },
    {
      "epoch": 0.11221,
      "grad_norm": 0.950211704268758,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 11221
    },
    {
      "epoch": 0.11222,
      "grad_norm": 1.1123561273367195,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 11222
    },
    {
      "epoch": 0.11223,
      "grad_norm": 1.1138231860334924,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 11223
    },
    {
      "epoch": 0.11224,
      "grad_norm": 1.33255470843806,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 11224
    },
    {
      "epoch": 0.11225,
      "grad_norm": 1.0201189422663082,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 11225
    },
    {
      "epoch": 0.11226,
      "grad_norm": 1.2296700017941067,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 11226
    },
    {
      "epoch": 0.11227,
      "grad_norm": 1.2361560337024013,
      "learning_rate": 0.003,
      "loss": 4.015,
      "step": 11227
    },
    {
      "epoch": 0.11228,
      "grad_norm": 1.359208100581836,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 11228
    },
    {
      "epoch": 0.11229,
      "grad_norm": 0.9721364087537138,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 11229
    },
    {
      "epoch": 0.1123,
      "grad_norm": 1.056450608774184,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 11230
    },
    {
      "epoch": 0.11231,
      "grad_norm": 1.1151470529423464,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 11231
    },
    {
      "epoch": 0.11232,
      "grad_norm": 1.349357130637738,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 11232
    },
    {
      "epoch": 0.11233,
      "grad_norm": 1.2692980366831477,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 11233
    },
    {
      "epoch": 0.11234,
      "grad_norm": 1.0600042315155727,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 11234
    },
    {
      "epoch": 0.11235,
      "grad_norm": 0.9730948794785325,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 11235
    },
    {
      "epoch": 0.11236,
      "grad_norm": 1.2005508877119877,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 11236
    },
    {
      "epoch": 0.11237,
      "grad_norm": 1.2147190669139891,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 11237
    },
    {
      "epoch": 0.11238,
      "grad_norm": 1.1338750404710822,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 11238
    },
    {
      "epoch": 0.11239,
      "grad_norm": 1.138170848080884,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 11239
    },
    {
      "epoch": 0.1124,
      "grad_norm": 1.031187833440196,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 11240
    },
    {
      "epoch": 0.11241,
      "grad_norm": 1.2789914501541244,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 11241
    },
    {
      "epoch": 0.11242,
      "grad_norm": 1.2236110455515656,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 11242
    },
    {
      "epoch": 0.11243,
      "grad_norm": 1.1093808126477966,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 11243
    },
    {
      "epoch": 0.11244,
      "grad_norm": 1.288173452117589,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 11244
    },
    {
      "epoch": 0.11245,
      "grad_norm": 1.0828260500666966,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 11245
    },
    {
      "epoch": 0.11246,
      "grad_norm": 1.1567543117559596,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 11246
    },
    {
      "epoch": 0.11247,
      "grad_norm": 1.3948568681025382,
      "learning_rate": 0.003,
      "loss": 4.0069,
      "step": 11247
    },
    {
      "epoch": 0.11248,
      "grad_norm": 1.0116804641863193,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 11248
    },
    {
      "epoch": 0.11249,
      "grad_norm": 1.3046509496910825,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 11249
    },
    {
      "epoch": 0.1125,
      "grad_norm": 0.9991437863954199,
      "learning_rate": 0.003,
      "loss": 4.0072,
      "step": 11250
    },
    {
      "epoch": 0.11251,
      "grad_norm": 1.6013468281261287,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 11251
    },
    {
      "epoch": 0.11252,
      "grad_norm": 0.8687536172748702,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 11252
    },
    {
      "epoch": 0.11253,
      "grad_norm": 1.0983958258583018,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 11253
    },
    {
      "epoch": 0.11254,
      "grad_norm": 1.226302806954953,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 11254
    },
    {
      "epoch": 0.11255,
      "grad_norm": 1.0662642116642365,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 11255
    },
    {
      "epoch": 0.11256,
      "grad_norm": 1.243463378805967,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 11256
    },
    {
      "epoch": 0.11257,
      "grad_norm": 1.0726761813569627,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 11257
    },
    {
      "epoch": 0.11258,
      "grad_norm": 1.101242624393574,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 11258
    },
    {
      "epoch": 0.11259,
      "grad_norm": 1.2600622231655116,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 11259
    },
    {
      "epoch": 0.1126,
      "grad_norm": 0.9980365205693852,
      "learning_rate": 0.003,
      "loss": 4.0085,
      "step": 11260
    },
    {
      "epoch": 0.11261,
      "grad_norm": 1.2636683024025575,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 11261
    },
    {
      "epoch": 0.11262,
      "grad_norm": 0.91784299165787,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 11262
    },
    {
      "epoch": 0.11263,
      "grad_norm": 1.2597297519227169,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 11263
    },
    {
      "epoch": 0.11264,
      "grad_norm": 1.1932659497244533,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 11264
    },
    {
      "epoch": 0.11265,
      "grad_norm": 0.9816454818813927,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 11265
    },
    {
      "epoch": 0.11266,
      "grad_norm": 1.2284969289133585,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 11266
    },
    {
      "epoch": 0.11267,
      "grad_norm": 1.1261765260002052,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 11267
    },
    {
      "epoch": 0.11268,
      "grad_norm": 1.08799226517971,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 11268
    },
    {
      "epoch": 0.11269,
      "grad_norm": 1.1922244816252106,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 11269
    },
    {
      "epoch": 0.1127,
      "grad_norm": 0.9472450427826526,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 11270
    },
    {
      "epoch": 0.11271,
      "grad_norm": 1.1932608996712035,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 11271
    },
    {
      "epoch": 0.11272,
      "grad_norm": 1.1892191152480909,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 11272
    },
    {
      "epoch": 0.11273,
      "grad_norm": 1.2513453159910775,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 11273
    },
    {
      "epoch": 0.11274,
      "grad_norm": 1.2756382922479819,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 11274
    },
    {
      "epoch": 0.11275,
      "grad_norm": 1.0956380848789105,
      "learning_rate": 0.003,
      "loss": 3.9996,
      "step": 11275
    },
    {
      "epoch": 0.11276,
      "grad_norm": 1.166088340374118,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 11276
    },
    {
      "epoch": 0.11277,
      "grad_norm": 0.9706800035472809,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 11277
    },
    {
      "epoch": 0.11278,
      "grad_norm": 1.277416659504229,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 11278
    },
    {
      "epoch": 0.11279,
      "grad_norm": 1.1485716440991571,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 11279
    },
    {
      "epoch": 0.1128,
      "grad_norm": 1.2519360307904746,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 11280
    },
    {
      "epoch": 0.11281,
      "grad_norm": 1.1269013514569461,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 11281
    },
    {
      "epoch": 0.11282,
      "grad_norm": 1.2633224979713635,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 11282
    },
    {
      "epoch": 0.11283,
      "grad_norm": 0.9853227805426616,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 11283
    },
    {
      "epoch": 0.11284,
      "grad_norm": 1.3468725308834935,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 11284
    },
    {
      "epoch": 0.11285,
      "grad_norm": 0.9626469410998565,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 11285
    },
    {
      "epoch": 0.11286,
      "grad_norm": 1.2746488595767842,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 11286
    },
    {
      "epoch": 0.11287,
      "grad_norm": 1.023668641008112,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 11287
    },
    {
      "epoch": 0.11288,
      "grad_norm": 1.2629926336246062,
      "learning_rate": 0.003,
      "loss": 4.0072,
      "step": 11288
    },
    {
      "epoch": 0.11289,
      "grad_norm": 1.1125863518225823,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 11289
    },
    {
      "epoch": 0.1129,
      "grad_norm": 1.2085679726727696,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 11290
    },
    {
      "epoch": 0.11291,
      "grad_norm": 1.020769473245544,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 11291
    },
    {
      "epoch": 0.11292,
      "grad_norm": 1.5500356961334352,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 11292
    },
    {
      "epoch": 0.11293,
      "grad_norm": 1.1685657842109745,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 11293
    },
    {
      "epoch": 0.11294,
      "grad_norm": 1.3386643011461636,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 11294
    },
    {
      "epoch": 0.11295,
      "grad_norm": 1.1580125532963097,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 11295
    },
    {
      "epoch": 0.11296,
      "grad_norm": 1.0489120902824944,
      "learning_rate": 0.003,
      "loss": 4.0122,
      "step": 11296
    },
    {
      "epoch": 0.11297,
      "grad_norm": 1.103839461492198,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 11297
    },
    {
      "epoch": 0.11298,
      "grad_norm": 1.2294443402904203,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 11298
    },
    {
      "epoch": 0.11299,
      "grad_norm": 1.2428348430774223,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 11299
    },
    {
      "epoch": 0.113,
      "grad_norm": 1.188214219956524,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 11300
    },
    {
      "epoch": 0.11301,
      "grad_norm": 1.091582106196238,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 11301
    },
    {
      "epoch": 0.11302,
      "grad_norm": 0.9288460311574318,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 11302
    },
    {
      "epoch": 0.11303,
      "grad_norm": 1.4007101429225426,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 11303
    },
    {
      "epoch": 0.11304,
      "grad_norm": 1.0982722121566038,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 11304
    },
    {
      "epoch": 0.11305,
      "grad_norm": 1.2279359120748694,
      "learning_rate": 0.003,
      "loss": 4.012,
      "step": 11305
    },
    {
      "epoch": 0.11306,
      "grad_norm": 1.0604973608241537,
      "learning_rate": 0.003,
      "loss": 4.0086,
      "step": 11306
    },
    {
      "epoch": 0.11307,
      "grad_norm": 1.1270170083873274,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 11307
    },
    {
      "epoch": 0.11308,
      "grad_norm": 1.0777135494150742,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 11308
    },
    {
      "epoch": 0.11309,
      "grad_norm": 1.3544518294662256,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 11309
    },
    {
      "epoch": 0.1131,
      "grad_norm": 0.9727494707141395,
      "learning_rate": 0.003,
      "loss": 4.0062,
      "step": 11310
    },
    {
      "epoch": 0.11311,
      "grad_norm": 1.3272901030217268,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 11311
    },
    {
      "epoch": 0.11312,
      "grad_norm": 1.2076378589872812,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 11312
    },
    {
      "epoch": 0.11313,
      "grad_norm": 1.1833510307690753,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 11313
    },
    {
      "epoch": 0.11314,
      "grad_norm": 1.2380294898068176,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 11314
    },
    {
      "epoch": 0.11315,
      "grad_norm": 0.9562005645157645,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 11315
    },
    {
      "epoch": 0.11316,
      "grad_norm": 1.2025637701941305,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 11316
    },
    {
      "epoch": 0.11317,
      "grad_norm": 1.1779191244129885,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 11317
    },
    {
      "epoch": 0.11318,
      "grad_norm": 1.2236972961058097,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 11318
    },
    {
      "epoch": 0.11319,
      "grad_norm": 1.0549576319959617,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 11319
    },
    {
      "epoch": 0.1132,
      "grad_norm": 1.2105786067689994,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 11320
    },
    {
      "epoch": 0.11321,
      "grad_norm": 1.3397052436576664,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 11321
    },
    {
      "epoch": 0.11322,
      "grad_norm": 1.2893426572692654,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 11322
    },
    {
      "epoch": 0.11323,
      "grad_norm": 0.9407951953734294,
      "learning_rate": 0.003,
      "loss": 3.9886,
      "step": 11323
    },
    {
      "epoch": 0.11324,
      "grad_norm": 1.177515193019718,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 11324
    },
    {
      "epoch": 0.11325,
      "grad_norm": 1.2549973557620873,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 11325
    },
    {
      "epoch": 0.11326,
      "grad_norm": 1.0497216278305863,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 11326
    },
    {
      "epoch": 0.11327,
      "grad_norm": 1.2211928944892607,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 11327
    },
    {
      "epoch": 0.11328,
      "grad_norm": 1.1575302001609644,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 11328
    },
    {
      "epoch": 0.11329,
      "grad_norm": 1.353259362168543,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11329
    },
    {
      "epoch": 0.1133,
      "grad_norm": 1.1673317612996932,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 11330
    },
    {
      "epoch": 0.11331,
      "grad_norm": 1.110344451425524,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 11331
    },
    {
      "epoch": 0.11332,
      "grad_norm": 0.9832494841416515,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 11332
    },
    {
      "epoch": 0.11333,
      "grad_norm": 1.1341803958872791,
      "learning_rate": 0.003,
      "loss": 3.9836,
      "step": 11333
    },
    {
      "epoch": 0.11334,
      "grad_norm": 1.0713985717412855,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 11334
    },
    {
      "epoch": 0.11335,
      "grad_norm": 1.4947599748518885,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 11335
    },
    {
      "epoch": 0.11336,
      "grad_norm": 1.0176108077782644,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 11336
    },
    {
      "epoch": 0.11337,
      "grad_norm": 1.0630112841599184,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 11337
    },
    {
      "epoch": 0.11338,
      "grad_norm": 1.0818296952259,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 11338
    },
    {
      "epoch": 0.11339,
      "grad_norm": 1.2490039926297583,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 11339
    },
    {
      "epoch": 0.1134,
      "grad_norm": 1.094214947697734,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 11340
    },
    {
      "epoch": 0.11341,
      "grad_norm": 1.1634860925483297,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 11341
    },
    {
      "epoch": 0.11342,
      "grad_norm": 1.1087200576928156,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 11342
    },
    {
      "epoch": 0.11343,
      "grad_norm": 1.0264504151598612,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 11343
    },
    {
      "epoch": 0.11344,
      "grad_norm": 1.1105495732679151,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 11344
    },
    {
      "epoch": 0.11345,
      "grad_norm": 0.94706032963158,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 11345
    },
    {
      "epoch": 0.11346,
      "grad_norm": 1.2356018822595631,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 11346
    },
    {
      "epoch": 0.11347,
      "grad_norm": 1.1999112899120898,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 11347
    },
    {
      "epoch": 0.11348,
      "grad_norm": 1.123422414749545,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 11348
    },
    {
      "epoch": 0.11349,
      "grad_norm": 1.1936480899464423,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 11349
    },
    {
      "epoch": 0.1135,
      "grad_norm": 1.3146543934382486,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 11350
    },
    {
      "epoch": 0.11351,
      "grad_norm": 1.2860271723469208,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 11351
    },
    {
      "epoch": 0.11352,
      "grad_norm": 1.0410866520299848,
      "learning_rate": 0.003,
      "loss": 3.9984,
      "step": 11352
    },
    {
      "epoch": 0.11353,
      "grad_norm": 1.09507802259332,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 11353
    },
    {
      "epoch": 0.11354,
      "grad_norm": 1.2049447545318868,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 11354
    },
    {
      "epoch": 0.11355,
      "grad_norm": 1.1115701247127443,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 11355
    },
    {
      "epoch": 0.11356,
      "grad_norm": 1.235219941310057,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 11356
    },
    {
      "epoch": 0.11357,
      "grad_norm": 1.165360102724017,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 11357
    },
    {
      "epoch": 0.11358,
      "grad_norm": 1.3339213697650871,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 11358
    },
    {
      "epoch": 0.11359,
      "grad_norm": 0.972211541313835,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 11359
    },
    {
      "epoch": 0.1136,
      "grad_norm": 1.0879757952074331,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 11360
    },
    {
      "epoch": 0.11361,
      "grad_norm": 1.065115109105123,
      "learning_rate": 0.003,
      "loss": 4.0001,
      "step": 11361
    },
    {
      "epoch": 0.11362,
      "grad_norm": 1.24872681243813,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 11362
    },
    {
      "epoch": 0.11363,
      "grad_norm": 1.072079099300088,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 11363
    },
    {
      "epoch": 0.11364,
      "grad_norm": 1.055551473984547,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 11364
    },
    {
      "epoch": 0.11365,
      "grad_norm": 1.2328441149042377,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 11365
    },
    {
      "epoch": 0.11366,
      "grad_norm": 1.0839732791788177,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 11366
    },
    {
      "epoch": 0.11367,
      "grad_norm": 1.1584220106389211,
      "learning_rate": 0.003,
      "loss": 4.0,
      "step": 11367
    },
    {
      "epoch": 0.11368,
      "grad_norm": 1.0721506976287616,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 11368
    },
    {
      "epoch": 0.11369,
      "grad_norm": 1.3646659075049767,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 11369
    },
    {
      "epoch": 0.1137,
      "grad_norm": 1.1322442704014541,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 11370
    },
    {
      "epoch": 0.11371,
      "grad_norm": 1.0558050074955512,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 11371
    },
    {
      "epoch": 0.11372,
      "grad_norm": 1.1301617951796612,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 11372
    },
    {
      "epoch": 0.11373,
      "grad_norm": 0.9812153815857593,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11373
    },
    {
      "epoch": 0.11374,
      "grad_norm": 1.359681063168288,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 11374
    },
    {
      "epoch": 0.11375,
      "grad_norm": 1.1989683827617195,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 11375
    },
    {
      "epoch": 0.11376,
      "grad_norm": 0.9981750577047487,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 11376
    },
    {
      "epoch": 0.11377,
      "grad_norm": 1.2247933958516897,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 11377
    },
    {
      "epoch": 0.11378,
      "grad_norm": 1.219591085402563,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 11378
    },
    {
      "epoch": 0.11379,
      "grad_norm": 1.1739864083872635,
      "learning_rate": 0.003,
      "loss": 4.0099,
      "step": 11379
    },
    {
      "epoch": 0.1138,
      "grad_norm": 1.1150848974614544,
      "learning_rate": 0.003,
      "loss": 4.0033,
      "step": 11380
    },
    {
      "epoch": 0.11381,
      "grad_norm": 1.32267660459148,
      "learning_rate": 0.003,
      "loss": 3.9899,
      "step": 11381
    },
    {
      "epoch": 0.11382,
      "grad_norm": 1.19364357962154,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 11382
    },
    {
      "epoch": 0.11383,
      "grad_norm": 1.1210298550024658,
      "learning_rate": 0.003,
      "loss": 4.0096,
      "step": 11383
    },
    {
      "epoch": 0.11384,
      "grad_norm": 1.2945719303722119,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 11384
    },
    {
      "epoch": 0.11385,
      "grad_norm": 0.8328895137306691,
      "learning_rate": 0.003,
      "loss": 4.0099,
      "step": 11385
    },
    {
      "epoch": 0.11386,
      "grad_norm": 0.9686545135829541,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 11386
    },
    {
      "epoch": 0.11387,
      "grad_norm": 1.3533951425487776,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 11387
    },
    {
      "epoch": 0.11388,
      "grad_norm": 0.9410384710036733,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 11388
    },
    {
      "epoch": 0.11389,
      "grad_norm": 1.2925355506565999,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 11389
    },
    {
      "epoch": 0.1139,
      "grad_norm": 1.187327748759508,
      "learning_rate": 0.003,
      "loss": 4.0163,
      "step": 11390
    },
    {
      "epoch": 0.11391,
      "grad_norm": 1.2938651872263744,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 11391
    },
    {
      "epoch": 0.11392,
      "grad_norm": 1.1411249301402717,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 11392
    },
    {
      "epoch": 0.11393,
      "grad_norm": 1.1337183382980245,
      "learning_rate": 0.003,
      "loss": 3.9984,
      "step": 11393
    },
    {
      "epoch": 0.11394,
      "grad_norm": 1.0889052885198094,
      "learning_rate": 0.003,
      "loss": 3.9953,
      "step": 11394
    },
    {
      "epoch": 0.11395,
      "grad_norm": 1.1071420401026428,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 11395
    },
    {
      "epoch": 0.11396,
      "grad_norm": 0.9858679783214278,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 11396
    },
    {
      "epoch": 0.11397,
      "grad_norm": 1.2081766368555051,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 11397
    },
    {
      "epoch": 0.11398,
      "grad_norm": 1.1904796842320102,
      "learning_rate": 0.003,
      "loss": 4.003,
      "step": 11398
    },
    {
      "epoch": 0.11399,
      "grad_norm": 1.1652838732036046,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 11399
    },
    {
      "epoch": 0.114,
      "grad_norm": 1.1994199478656578,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 11400
    },
    {
      "epoch": 0.11401,
      "grad_norm": 1.0582784467349977,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 11401
    },
    {
      "epoch": 0.11402,
      "grad_norm": 1.1358520028873116,
      "learning_rate": 0.003,
      "loss": 4.0085,
      "step": 11402
    },
    {
      "epoch": 0.11403,
      "grad_norm": 1.0954439597350976,
      "learning_rate": 0.003,
      "loss": 3.9905,
      "step": 11403
    },
    {
      "epoch": 0.11404,
      "grad_norm": 1.2934994659161199,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 11404
    },
    {
      "epoch": 0.11405,
      "grad_norm": 1.1821009379113836,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 11405
    },
    {
      "epoch": 0.11406,
      "grad_norm": 0.9323614807006916,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 11406
    },
    {
      "epoch": 0.11407,
      "grad_norm": 1.044288690455784,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 11407
    },
    {
      "epoch": 0.11408,
      "grad_norm": 1.288371712660551,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 11408
    },
    {
      "epoch": 0.11409,
      "grad_norm": 1.1013793581481566,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 11409
    },
    {
      "epoch": 0.1141,
      "grad_norm": 1.2399270262890425,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 11410
    },
    {
      "epoch": 0.11411,
      "grad_norm": 1.198910800893651,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 11411
    },
    {
      "epoch": 0.11412,
      "grad_norm": 1.2722748826966932,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 11412
    },
    {
      "epoch": 0.11413,
      "grad_norm": 1.2237602676428068,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 11413
    },
    {
      "epoch": 0.11414,
      "grad_norm": 1.054751554499624,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 11414
    },
    {
      "epoch": 0.11415,
      "grad_norm": 1.3859608251730753,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 11415
    },
    {
      "epoch": 0.11416,
      "grad_norm": 0.9023833580320342,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 11416
    },
    {
      "epoch": 0.11417,
      "grad_norm": 1.163717032717626,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 11417
    },
    {
      "epoch": 0.11418,
      "grad_norm": 1.1635066782698764,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 11418
    },
    {
      "epoch": 0.11419,
      "grad_norm": 1.2785442673812115,
      "learning_rate": 0.003,
      "loss": 4.0065,
      "step": 11419
    },
    {
      "epoch": 0.1142,
      "grad_norm": 1.1457574905694237,
      "learning_rate": 0.003,
      "loss": 4.0046,
      "step": 11420
    },
    {
      "epoch": 0.11421,
      "grad_norm": 1.2404496422928504,
      "learning_rate": 0.003,
      "loss": 3.9984,
      "step": 11421
    },
    {
      "epoch": 0.11422,
      "grad_norm": 0.9198614788403456,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 11422
    },
    {
      "epoch": 0.11423,
      "grad_norm": 0.9771207760414389,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 11423
    },
    {
      "epoch": 0.11424,
      "grad_norm": 1.0105210455470817,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 11424
    },
    {
      "epoch": 0.11425,
      "grad_norm": 1.209588882831824,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 11425
    },
    {
      "epoch": 0.11426,
      "grad_norm": 1.0050650721043324,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 11426
    },
    {
      "epoch": 0.11427,
      "grad_norm": 1.2129381435966735,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 11427
    },
    {
      "epoch": 0.11428,
      "grad_norm": 1.1725659285639696,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 11428
    },
    {
      "epoch": 0.11429,
      "grad_norm": 1.3772603501943919,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 11429
    },
    {
      "epoch": 0.1143,
      "grad_norm": 1.1189640923482154,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 11430
    },
    {
      "epoch": 0.11431,
      "grad_norm": 1.0788431637790905,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 11431
    },
    {
      "epoch": 0.11432,
      "grad_norm": 1.3054445233916165,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 11432
    },
    {
      "epoch": 0.11433,
      "grad_norm": 1.0880911650168383,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 11433
    },
    {
      "epoch": 0.11434,
      "grad_norm": 1.1337028141735888,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 11434
    },
    {
      "epoch": 0.11435,
      "grad_norm": 1.1674027454792053,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 11435
    },
    {
      "epoch": 0.11436,
      "grad_norm": 1.0782480965612236,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 11436
    },
    {
      "epoch": 0.11437,
      "grad_norm": 1.0494128681386032,
      "learning_rate": 0.003,
      "loss": 3.9871,
      "step": 11437
    },
    {
      "epoch": 0.11438,
      "grad_norm": 1.2824338283149883,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 11438
    },
    {
      "epoch": 0.11439,
      "grad_norm": 1.1874792640867795,
      "learning_rate": 0.003,
      "loss": 4.0084,
      "step": 11439
    },
    {
      "epoch": 0.1144,
      "grad_norm": 1.1122036447087196,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 11440
    },
    {
      "epoch": 0.11441,
      "grad_norm": 1.190227594977014,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 11441
    },
    {
      "epoch": 0.11442,
      "grad_norm": 1.270616577468654,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 11442
    },
    {
      "epoch": 0.11443,
      "grad_norm": 1.3070052964269097,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11443
    },
    {
      "epoch": 0.11444,
      "grad_norm": 1.03052198866508,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 11444
    },
    {
      "epoch": 0.11445,
      "grad_norm": 1.1390169230493974,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 11445
    },
    {
      "epoch": 0.11446,
      "grad_norm": 1.3887008590353196,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 11446
    },
    {
      "epoch": 0.11447,
      "grad_norm": 1.3046931809126903,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 11447
    },
    {
      "epoch": 0.11448,
      "grad_norm": 1.1339224847291296,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 11448
    },
    {
      "epoch": 0.11449,
      "grad_norm": 1.2471236226671056,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 11449
    },
    {
      "epoch": 0.1145,
      "grad_norm": 1.1731443345300483,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 11450
    },
    {
      "epoch": 0.11451,
      "grad_norm": 1.051150483846822,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 11451
    },
    {
      "epoch": 0.11452,
      "grad_norm": 1.2092227172081997,
      "learning_rate": 0.003,
      "loss": 4.0061,
      "step": 11452
    },
    {
      "epoch": 0.11453,
      "grad_norm": 1.167343479702236,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 11453
    },
    {
      "epoch": 0.11454,
      "grad_norm": 1.3660105566993612,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 11454
    },
    {
      "epoch": 0.11455,
      "grad_norm": 1.1629557310949954,
      "learning_rate": 0.003,
      "loss": 3.9962,
      "step": 11455
    },
    {
      "epoch": 0.11456,
      "grad_norm": 1.1739663234451223,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 11456
    },
    {
      "epoch": 0.11457,
      "grad_norm": 1.1156521901007033,
      "learning_rate": 0.003,
      "loss": 4.0112,
      "step": 11457
    },
    {
      "epoch": 0.11458,
      "grad_norm": 1.2592133833338512,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 11458
    },
    {
      "epoch": 0.11459,
      "grad_norm": 0.9615728049014559,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 11459
    },
    {
      "epoch": 0.1146,
      "grad_norm": 1.2024834313430042,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 11460
    },
    {
      "epoch": 0.11461,
      "grad_norm": 1.2257514801570328,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 11461
    },
    {
      "epoch": 0.11462,
      "grad_norm": 1.0895605253494356,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 11462
    },
    {
      "epoch": 0.11463,
      "grad_norm": 1.1193074344631706,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 11463
    },
    {
      "epoch": 0.11464,
      "grad_norm": 1.294479238346295,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 11464
    },
    {
      "epoch": 0.11465,
      "grad_norm": 1.2104511450000375,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 11465
    },
    {
      "epoch": 0.11466,
      "grad_norm": 1.3503066663860102,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 11466
    },
    {
      "epoch": 0.11467,
      "grad_norm": 1.0443750276791484,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 11467
    },
    {
      "epoch": 0.11468,
      "grad_norm": 1.0505892359693176,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 11468
    },
    {
      "epoch": 0.11469,
      "grad_norm": 1.2825643225762613,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 11469
    },
    {
      "epoch": 0.1147,
      "grad_norm": 1.0516531724729168,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 11470
    },
    {
      "epoch": 0.11471,
      "grad_norm": 1.3346428910912866,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 11471
    },
    {
      "epoch": 0.11472,
      "grad_norm": 1.095097921047335,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 11472
    },
    {
      "epoch": 0.11473,
      "grad_norm": 1.0698418013171735,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 11473
    },
    {
      "epoch": 0.11474,
      "grad_norm": 1.248084189275581,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 11474
    },
    {
      "epoch": 0.11475,
      "grad_norm": 1.021325495043658,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 11475
    },
    {
      "epoch": 0.11476,
      "grad_norm": 1.1909200855488131,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 11476
    },
    {
      "epoch": 0.11477,
      "grad_norm": 0.8421493513707902,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 11477
    },
    {
      "epoch": 0.11478,
      "grad_norm": 1.0695750946042661,
      "learning_rate": 0.003,
      "loss": 4.0002,
      "step": 11478
    },
    {
      "epoch": 0.11479,
      "grad_norm": 1.2622542869695088,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 11479
    },
    {
      "epoch": 0.1148,
      "grad_norm": 1.092292616556263,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 11480
    },
    {
      "epoch": 0.11481,
      "grad_norm": 1.145024396755148,
      "learning_rate": 0.003,
      "loss": 4.0091,
      "step": 11481
    },
    {
      "epoch": 0.11482,
      "grad_norm": 1.2473154146510699,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 11482
    },
    {
      "epoch": 0.11483,
      "grad_norm": 1.2566705438815857,
      "learning_rate": 0.003,
      "loss": 4.0076,
      "step": 11483
    },
    {
      "epoch": 0.11484,
      "grad_norm": 1.0349608438987377,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 11484
    },
    {
      "epoch": 0.11485,
      "grad_norm": 1.4355317012427342,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 11485
    },
    {
      "epoch": 0.11486,
      "grad_norm": 0.9326495474339838,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 11486
    },
    {
      "epoch": 0.11487,
      "grad_norm": 1.192660410494905,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 11487
    },
    {
      "epoch": 0.11488,
      "grad_norm": 1.2529854252829626,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 11488
    },
    {
      "epoch": 0.11489,
      "grad_norm": 1.089982573108441,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 11489
    },
    {
      "epoch": 0.1149,
      "grad_norm": 1.0719423276675382,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 11490
    },
    {
      "epoch": 0.11491,
      "grad_norm": 1.2350327715936045,
      "learning_rate": 0.003,
      "loss": 4.0025,
      "step": 11491
    },
    {
      "epoch": 0.11492,
      "grad_norm": 1.031561706435445,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 11492
    },
    {
      "epoch": 0.11493,
      "grad_norm": 1.2174218874516956,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 11493
    },
    {
      "epoch": 0.11494,
      "grad_norm": 1.1968611290413345,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 11494
    },
    {
      "epoch": 0.11495,
      "grad_norm": 1.0535886969774535,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 11495
    },
    {
      "epoch": 0.11496,
      "grad_norm": 1.3975302549169397,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 11496
    },
    {
      "epoch": 0.11497,
      "grad_norm": 1.2029400300955522,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 11497
    },
    {
      "epoch": 0.11498,
      "grad_norm": 1.234457060439203,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 11498
    },
    {
      "epoch": 0.11499,
      "grad_norm": 1.1060078725145015,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 11499
    },
    {
      "epoch": 0.115,
      "grad_norm": 1.2756853936300074,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 11500
    },
    {
      "epoch": 0.11501,
      "grad_norm": 1.2133184029419521,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 11501
    },
    {
      "epoch": 0.11502,
      "grad_norm": 1.0818298149149148,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 11502
    },
    {
      "epoch": 0.11503,
      "grad_norm": 1.2728423553765829,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 11503
    },
    {
      "epoch": 0.11504,
      "grad_norm": 1.059498569959539,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 11504
    },
    {
      "epoch": 0.11505,
      "grad_norm": 1.35683834617117,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 11505
    },
    {
      "epoch": 0.11506,
      "grad_norm": 0.9413672569378154,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 11506
    },
    {
      "epoch": 0.11507,
      "grad_norm": 0.9314304015475336,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 11507
    },
    {
      "epoch": 0.11508,
      "grad_norm": 1.2504519343724296,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 11508
    },
    {
      "epoch": 0.11509,
      "grad_norm": 1.151992191983872,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 11509
    },
    {
      "epoch": 0.1151,
      "grad_norm": 1.3329441485185418,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 11510
    },
    {
      "epoch": 0.11511,
      "grad_norm": 1.0288130767327575,
      "learning_rate": 0.003,
      "loss": 4.0028,
      "step": 11511
    },
    {
      "epoch": 0.11512,
      "grad_norm": 1.2555285931550986,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 11512
    },
    {
      "epoch": 0.11513,
      "grad_norm": 1.0795969059027257,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 11513
    },
    {
      "epoch": 0.11514,
      "grad_norm": 1.2133450305538067,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 11514
    },
    {
      "epoch": 0.11515,
      "grad_norm": 1.0929272209567709,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 11515
    },
    {
      "epoch": 0.11516,
      "grad_norm": 1.193595963865107,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 11516
    },
    {
      "epoch": 0.11517,
      "grad_norm": 1.1423550823219821,
      "learning_rate": 0.003,
      "loss": 4.0151,
      "step": 11517
    },
    {
      "epoch": 0.11518,
      "grad_norm": 1.2145234983955724,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 11518
    },
    {
      "epoch": 0.11519,
      "grad_norm": 1.1485471374131204,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 11519
    },
    {
      "epoch": 0.1152,
      "grad_norm": 1.1180316109358304,
      "learning_rate": 0.003,
      "loss": 3.9936,
      "step": 11520
    },
    {
      "epoch": 0.11521,
      "grad_norm": 1.2276448661153612,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 11521
    },
    {
      "epoch": 0.11522,
      "grad_norm": 1.175171062022422,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 11522
    },
    {
      "epoch": 0.11523,
      "grad_norm": 1.3095154968379423,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 11523
    },
    {
      "epoch": 0.11524,
      "grad_norm": 1.1840175023993502,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 11524
    },
    {
      "epoch": 0.11525,
      "grad_norm": 1.2260241036155453,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 11525
    },
    {
      "epoch": 0.11526,
      "grad_norm": 1.2519788784999393,
      "learning_rate": 0.003,
      "loss": 4.0045,
      "step": 11526
    },
    {
      "epoch": 0.11527,
      "grad_norm": 1.2082041366767022,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 11527
    },
    {
      "epoch": 0.11528,
      "grad_norm": 1.070100717213944,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 11528
    },
    {
      "epoch": 0.11529,
      "grad_norm": 1.0301876336933986,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 11529
    },
    {
      "epoch": 0.1153,
      "grad_norm": 1.1693678818554514,
      "learning_rate": 0.003,
      "loss": 3.9957,
      "step": 11530
    },
    {
      "epoch": 0.11531,
      "grad_norm": 1.0247944374771782,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 11531
    },
    {
      "epoch": 0.11532,
      "grad_norm": 1.274822947052742,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 11532
    },
    {
      "epoch": 0.11533,
      "grad_norm": 0.9507147171376193,
      "learning_rate": 0.003,
      "loss": 4.0119,
      "step": 11533
    },
    {
      "epoch": 0.11534,
      "grad_norm": 1.3131155527752056,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 11534
    },
    {
      "epoch": 0.11535,
      "grad_norm": 1.1716121524993672,
      "learning_rate": 0.003,
      "loss": 4.0038,
      "step": 11535
    },
    {
      "epoch": 0.11536,
      "grad_norm": 1.3964603557745452,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 11536
    },
    {
      "epoch": 0.11537,
      "grad_norm": 1.0643667779288306,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 11537
    },
    {
      "epoch": 0.11538,
      "grad_norm": 1.0516657590339504,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 11538
    },
    {
      "epoch": 0.11539,
      "grad_norm": 1.348470067591403,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 11539
    },
    {
      "epoch": 0.1154,
      "grad_norm": 0.9565756930424267,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 11540
    },
    {
      "epoch": 0.11541,
      "grad_norm": 1.2425855920676405,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 11541
    },
    {
      "epoch": 0.11542,
      "grad_norm": 0.9444027602069982,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 11542
    },
    {
      "epoch": 0.11543,
      "grad_norm": 1.121214808300316,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 11543
    },
    {
      "epoch": 0.11544,
      "grad_norm": 1.1647380685948656,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 11544
    },
    {
      "epoch": 0.11545,
      "grad_norm": 1.0922646545602452,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 11545
    },
    {
      "epoch": 0.11546,
      "grad_norm": 1.1959237319583436,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 11546
    },
    {
      "epoch": 0.11547,
      "grad_norm": 1.2168385051277104,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 11547
    },
    {
      "epoch": 0.11548,
      "grad_norm": 0.9906434040772247,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 11548
    },
    {
      "epoch": 0.11549,
      "grad_norm": 1.399388889519621,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 11549
    },
    {
      "epoch": 0.1155,
      "grad_norm": 1.1691281692028388,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 11550
    },
    {
      "epoch": 0.11551,
      "grad_norm": 1.2610503080435684,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 11551
    },
    {
      "epoch": 0.11552,
      "grad_norm": 1.0462781610867442,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 11552
    },
    {
      "epoch": 0.11553,
      "grad_norm": 1.3027344485563248,
      "learning_rate": 0.003,
      "loss": 4.0117,
      "step": 11553
    },
    {
      "epoch": 0.11554,
      "grad_norm": 0.9956780270740784,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 11554
    },
    {
      "epoch": 0.11555,
      "grad_norm": 1.4322465943251208,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 11555
    },
    {
      "epoch": 0.11556,
      "grad_norm": 1.0957293213169021,
      "learning_rate": 0.003,
      "loss": 4.0046,
      "step": 11556
    },
    {
      "epoch": 0.11557,
      "grad_norm": 1.3308831178294134,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 11557
    },
    {
      "epoch": 0.11558,
      "grad_norm": 1.1715374522210913,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 11558
    },
    {
      "epoch": 0.11559,
      "grad_norm": 1.0505976322706607,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 11559
    },
    {
      "epoch": 0.1156,
      "grad_norm": 1.1380918415657428,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 11560
    },
    {
      "epoch": 0.11561,
      "grad_norm": 1.1609609825263985,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 11561
    },
    {
      "epoch": 0.11562,
      "grad_norm": 1.3050344178576219,
      "learning_rate": 0.003,
      "loss": 4.0072,
      "step": 11562
    },
    {
      "epoch": 0.11563,
      "grad_norm": 0.9380550906728504,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 11563
    },
    {
      "epoch": 0.11564,
      "grad_norm": 1.1946440621831265,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 11564
    },
    {
      "epoch": 0.11565,
      "grad_norm": 1.3080572257976502,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 11565
    },
    {
      "epoch": 0.11566,
      "grad_norm": 1.250707143374676,
      "learning_rate": 0.003,
      "loss": 4.0024,
      "step": 11566
    },
    {
      "epoch": 0.11567,
      "grad_norm": 1.2311864131520067,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 11567
    },
    {
      "epoch": 0.11568,
      "grad_norm": 1.311111463929809,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 11568
    },
    {
      "epoch": 0.11569,
      "grad_norm": 0.8868310857413003,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 11569
    },
    {
      "epoch": 0.1157,
      "grad_norm": 1.1027450881292422,
      "learning_rate": 0.003,
      "loss": 3.9892,
      "step": 11570
    },
    {
      "epoch": 0.11571,
      "grad_norm": 1.1161081563397963,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 11571
    },
    {
      "epoch": 0.11572,
      "grad_norm": 0.9089775562151213,
      "learning_rate": 0.003,
      "loss": 3.976,
      "step": 11572
    },
    {
      "epoch": 0.11573,
      "grad_norm": 1.003694953680766,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 11573
    },
    {
      "epoch": 0.11574,
      "grad_norm": 1.3694504994237875,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 11574
    },
    {
      "epoch": 0.11575,
      "grad_norm": 1.1741369881532855,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 11575
    },
    {
      "epoch": 0.11576,
      "grad_norm": 1.436922790332464,
      "learning_rate": 0.003,
      "loss": 3.9982,
      "step": 11576
    },
    {
      "epoch": 0.11577,
      "grad_norm": 0.953413832089103,
      "learning_rate": 0.003,
      "loss": 4.0038,
      "step": 11577
    },
    {
      "epoch": 0.11578,
      "grad_norm": 1.0815315504459275,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 11578
    },
    {
      "epoch": 0.11579,
      "grad_norm": 1.331353293169261,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 11579
    },
    {
      "epoch": 0.1158,
      "grad_norm": 1.2014803447115574,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 11580
    },
    {
      "epoch": 0.11581,
      "grad_norm": 1.307264485319865,
      "learning_rate": 0.003,
      "loss": 3.9928,
      "step": 11581
    },
    {
      "epoch": 0.11582,
      "grad_norm": 1.0474446025518378,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 11582
    },
    {
      "epoch": 0.11583,
      "grad_norm": 1.1339925874103132,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 11583
    },
    {
      "epoch": 0.11584,
      "grad_norm": 1.0857258271985775,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 11584
    },
    {
      "epoch": 0.11585,
      "grad_norm": 1.2405218315074111,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 11585
    },
    {
      "epoch": 0.11586,
      "grad_norm": 1.14159513185874,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 11586
    },
    {
      "epoch": 0.11587,
      "grad_norm": 1.171361791538229,
      "learning_rate": 0.003,
      "loss": 3.9825,
      "step": 11587
    },
    {
      "epoch": 0.11588,
      "grad_norm": 0.8984976334031136,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 11588
    },
    {
      "epoch": 0.11589,
      "grad_norm": 1.0483798909522963,
      "learning_rate": 0.003,
      "loss": 4.0035,
      "step": 11589
    },
    {
      "epoch": 0.1159,
      "grad_norm": 1.3752191283722144,
      "learning_rate": 0.003,
      "loss": 4.0107,
      "step": 11590
    },
    {
      "epoch": 0.11591,
      "grad_norm": 0.9271251406957484,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 11591
    },
    {
      "epoch": 0.11592,
      "grad_norm": 1.1839711004608278,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 11592
    },
    {
      "epoch": 0.11593,
      "grad_norm": 1.327326801192664,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 11593
    },
    {
      "epoch": 0.11594,
      "grad_norm": 1.1228264344894299,
      "learning_rate": 0.003,
      "loss": 4.0006,
      "step": 11594
    },
    {
      "epoch": 0.11595,
      "grad_norm": 1.3993924632845647,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 11595
    },
    {
      "epoch": 0.11596,
      "grad_norm": 0.9955718638323818,
      "learning_rate": 0.003,
      "loss": 4.0053,
      "step": 11596
    },
    {
      "epoch": 0.11597,
      "grad_norm": 1.2034336458201005,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 11597
    },
    {
      "epoch": 0.11598,
      "grad_norm": 1.0008043287213872,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 11598
    },
    {
      "epoch": 0.11599,
      "grad_norm": 1.386252254726889,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 11599
    },
    {
      "epoch": 0.116,
      "grad_norm": 0.8982486028150353,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 11600
    },
    {
      "epoch": 0.11601,
      "grad_norm": 1.184347234907959,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 11601
    },
    {
      "epoch": 0.11602,
      "grad_norm": 1.1247974246340537,
      "learning_rate": 0.003,
      "loss": 3.9973,
      "step": 11602
    },
    {
      "epoch": 0.11603,
      "grad_norm": 1.1459727868895968,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 11603
    },
    {
      "epoch": 0.11604,
      "grad_norm": 1.200299318110692,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 11604
    },
    {
      "epoch": 0.11605,
      "grad_norm": 1.1221538219177465,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 11605
    },
    {
      "epoch": 0.11606,
      "grad_norm": 1.3896592029006039,
      "learning_rate": 0.003,
      "loss": 4.0158,
      "step": 11606
    },
    {
      "epoch": 0.11607,
      "grad_norm": 1.167856625001811,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 11607
    },
    {
      "epoch": 0.11608,
      "grad_norm": 1.2487032951275419,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 11608
    },
    {
      "epoch": 0.11609,
      "grad_norm": 0.9357902119911607,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 11609
    },
    {
      "epoch": 0.1161,
      "grad_norm": 1.3783721164193854,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 11610
    },
    {
      "epoch": 0.11611,
      "grad_norm": 1.0580641425258053,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 11611
    },
    {
      "epoch": 0.11612,
      "grad_norm": 1.0737145006413418,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 11612
    },
    {
      "epoch": 0.11613,
      "grad_norm": 1.2046751425123334,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 11613
    },
    {
      "epoch": 0.11614,
      "grad_norm": 1.1911854189282736,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 11614
    },
    {
      "epoch": 0.11615,
      "grad_norm": 1.2731578822836134,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 11615
    },
    {
      "epoch": 0.11616,
      "grad_norm": 1.486744540343558,
      "learning_rate": 0.003,
      "loss": 4.0101,
      "step": 11616
    },
    {
      "epoch": 0.11617,
      "grad_norm": 0.8968819287679518,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 11617
    },
    {
      "epoch": 0.11618,
      "grad_norm": 1.0047111452204807,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 11618
    },
    {
      "epoch": 0.11619,
      "grad_norm": 1.176206234770719,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 11619
    },
    {
      "epoch": 0.1162,
      "grad_norm": 1.2253326151523196,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 11620
    },
    {
      "epoch": 0.11621,
      "grad_norm": 1.1956657043524104,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 11621
    },
    {
      "epoch": 0.11622,
      "grad_norm": 0.9435279690248589,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 11622
    },
    {
      "epoch": 0.11623,
      "grad_norm": 1.056882142632435,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 11623
    },
    {
      "epoch": 0.11624,
      "grad_norm": 1.1106539056889895,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 11624
    },
    {
      "epoch": 0.11625,
      "grad_norm": 1.0205099002386708,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 11625
    },
    {
      "epoch": 0.11626,
      "grad_norm": 1.258461909905812,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 11626
    },
    {
      "epoch": 0.11627,
      "grad_norm": 1.2874361312056184,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 11627
    },
    {
      "epoch": 0.11628,
      "grad_norm": 1.1743309619531834,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 11628
    },
    {
      "epoch": 0.11629,
      "grad_norm": 1.2243600857600132,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 11629
    },
    {
      "epoch": 0.1163,
      "grad_norm": 1.0499602492021,
      "learning_rate": 0.003,
      "loss": 4.0095,
      "step": 11630
    },
    {
      "epoch": 0.11631,
      "grad_norm": 1.2573505771904938,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 11631
    },
    {
      "epoch": 0.11632,
      "grad_norm": 1.2413025869166792,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 11632
    },
    {
      "epoch": 0.11633,
      "grad_norm": 1.4540436444418998,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 11633
    },
    {
      "epoch": 0.11634,
      "grad_norm": 0.9548533573974899,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 11634
    },
    {
      "epoch": 0.11635,
      "grad_norm": 1.3071307922264086,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 11635
    },
    {
      "epoch": 0.11636,
      "grad_norm": 1.03525412132836,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 11636
    },
    {
      "epoch": 0.11637,
      "grad_norm": 1.2135127098907612,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 11637
    },
    {
      "epoch": 0.11638,
      "grad_norm": 1.3223760615299698,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 11638
    },
    {
      "epoch": 0.11639,
      "grad_norm": 0.9699089271963789,
      "learning_rate": 0.003,
      "loss": 4.0002,
      "step": 11639
    },
    {
      "epoch": 0.1164,
      "grad_norm": 1.2811922369560198,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 11640
    },
    {
      "epoch": 0.11641,
      "grad_norm": 1.036699916635101,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 11641
    },
    {
      "epoch": 0.11642,
      "grad_norm": 1.116072859573732,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 11642
    },
    {
      "epoch": 0.11643,
      "grad_norm": 1.1737632356245493,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 11643
    },
    {
      "epoch": 0.11644,
      "grad_norm": 1.105651145301224,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 11644
    },
    {
      "epoch": 0.11645,
      "grad_norm": 1.1206281967173415,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 11645
    },
    {
      "epoch": 0.11646,
      "grad_norm": 1.2792254145674709,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 11646
    },
    {
      "epoch": 0.11647,
      "grad_norm": 1.1472479706618874,
      "learning_rate": 0.003,
      "loss": 4.0105,
      "step": 11647
    },
    {
      "epoch": 0.11648,
      "grad_norm": 1.3828212140161604,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 11648
    },
    {
      "epoch": 0.11649,
      "grad_norm": 1.1798512117694304,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 11649
    },
    {
      "epoch": 0.1165,
      "grad_norm": 1.1784795673577373,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 11650
    },
    {
      "epoch": 0.11651,
      "grad_norm": 1.10280548005316,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 11651
    },
    {
      "epoch": 0.11652,
      "grad_norm": 1.2084602073133586,
      "learning_rate": 0.003,
      "loss": 3.9953,
      "step": 11652
    },
    {
      "epoch": 0.11653,
      "grad_norm": 1.1644747868763254,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 11653
    },
    {
      "epoch": 0.11654,
      "grad_norm": 1.1934077001382124,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 11654
    },
    {
      "epoch": 0.11655,
      "grad_norm": 0.9579468700893862,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 11655
    },
    {
      "epoch": 0.11656,
      "grad_norm": 1.2244849985241453,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 11656
    },
    {
      "epoch": 0.11657,
      "grad_norm": 1.2201292215580293,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 11657
    },
    {
      "epoch": 0.11658,
      "grad_norm": 1.008379460556807,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 11658
    },
    {
      "epoch": 0.11659,
      "grad_norm": 1.177586242231204,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11659
    },
    {
      "epoch": 0.1166,
      "grad_norm": 1.0891812559060503,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 11660
    },
    {
      "epoch": 0.11661,
      "grad_norm": 1.3079283675775348,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 11661
    },
    {
      "epoch": 0.11662,
      "grad_norm": 1.0842584854688044,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 11662
    },
    {
      "epoch": 0.11663,
      "grad_norm": 1.0420405487472146,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 11663
    },
    {
      "epoch": 0.11664,
      "grad_norm": 1.3155796441920706,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 11664
    },
    {
      "epoch": 0.11665,
      "grad_norm": 1.152392525026187,
      "learning_rate": 0.003,
      "loss": 3.9992,
      "step": 11665
    },
    {
      "epoch": 0.11666,
      "grad_norm": 1.217393356467726,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 11666
    },
    {
      "epoch": 0.11667,
      "grad_norm": 1.3174299861589436,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 11667
    },
    {
      "epoch": 0.11668,
      "grad_norm": 1.2379665248144123,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 11668
    },
    {
      "epoch": 0.11669,
      "grad_norm": 1.2462022619969693,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 11669
    },
    {
      "epoch": 0.1167,
      "grad_norm": 1.0813016807485878,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 11670
    },
    {
      "epoch": 0.11671,
      "grad_norm": 1.581906417209871,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 11671
    },
    {
      "epoch": 0.11672,
      "grad_norm": 0.8933295146366401,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 11672
    },
    {
      "epoch": 0.11673,
      "grad_norm": 1.3454666952243168,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 11673
    },
    {
      "epoch": 0.11674,
      "grad_norm": 1.254430804934053,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 11674
    },
    {
      "epoch": 0.11675,
      "grad_norm": 1.1407305796856528,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 11675
    },
    {
      "epoch": 0.11676,
      "grad_norm": 1.1240412766616164,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 11676
    },
    {
      "epoch": 0.11677,
      "grad_norm": 1.1039857268337923,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 11677
    },
    {
      "epoch": 0.11678,
      "grad_norm": 1.1382453808865138,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 11678
    },
    {
      "epoch": 0.11679,
      "grad_norm": 1.1297436535746768,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 11679
    },
    {
      "epoch": 0.1168,
      "grad_norm": 1.3532016024947384,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 11680
    },
    {
      "epoch": 0.11681,
      "grad_norm": 0.924056872724392,
      "learning_rate": 0.003,
      "loss": 4.0091,
      "step": 11681
    },
    {
      "epoch": 0.11682,
      "grad_norm": 1.3574144651574285,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 11682
    },
    {
      "epoch": 0.11683,
      "grad_norm": 0.9768276300170212,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 11683
    },
    {
      "epoch": 0.11684,
      "grad_norm": 1.2699611558933386,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 11684
    },
    {
      "epoch": 0.11685,
      "grad_norm": 1.2501909792103931,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 11685
    },
    {
      "epoch": 0.11686,
      "grad_norm": 1.1022171786236412,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 11686
    },
    {
      "epoch": 0.11687,
      "grad_norm": 1.3814616040574366,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 11687
    },
    {
      "epoch": 0.11688,
      "grad_norm": 0.9287871350220055,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 11688
    },
    {
      "epoch": 0.11689,
      "grad_norm": 1.2281964388187652,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 11689
    },
    {
      "epoch": 0.1169,
      "grad_norm": 1.1127225127033677,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 11690
    },
    {
      "epoch": 0.11691,
      "grad_norm": 1.2909281509347093,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 11691
    },
    {
      "epoch": 0.11692,
      "grad_norm": 1.3619954406747572,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 11692
    },
    {
      "epoch": 0.11693,
      "grad_norm": 1.1057049514943214,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 11693
    },
    {
      "epoch": 0.11694,
      "grad_norm": 1.1428819960705745,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 11694
    },
    {
      "epoch": 0.11695,
      "grad_norm": 1.1667755384935443,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 11695
    },
    {
      "epoch": 0.11696,
      "grad_norm": 1.4469227506294298,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 11696
    },
    {
      "epoch": 0.11697,
      "grad_norm": 1.005450115587486,
      "learning_rate": 0.003,
      "loss": 4.0015,
      "step": 11697
    },
    {
      "epoch": 0.11698,
      "grad_norm": 1.2238213012885069,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 11698
    },
    {
      "epoch": 0.11699,
      "grad_norm": 1.054385789261448,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 11699
    },
    {
      "epoch": 0.117,
      "grad_norm": 1.122631863282793,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 11700
    },
    {
      "epoch": 0.11701,
      "grad_norm": 1.0125507091879444,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 11701
    },
    {
      "epoch": 0.11702,
      "grad_norm": 1.4368068506374105,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 11702
    },
    {
      "epoch": 0.11703,
      "grad_norm": 1.1065201692933635,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 11703
    },
    {
      "epoch": 0.11704,
      "grad_norm": 1.1390282672127734,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 11704
    },
    {
      "epoch": 0.11705,
      "grad_norm": 1.2237420373219998,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 11705
    },
    {
      "epoch": 0.11706,
      "grad_norm": 1.2701077700750183,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 11706
    },
    {
      "epoch": 0.11707,
      "grad_norm": 1.0572688186092254,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 11707
    },
    {
      "epoch": 0.11708,
      "grad_norm": 1.1488670670780203,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 11708
    },
    {
      "epoch": 0.11709,
      "grad_norm": 1.157519213062066,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 11709
    },
    {
      "epoch": 0.1171,
      "grad_norm": 1.3123712860081451,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 11710
    },
    {
      "epoch": 0.11711,
      "grad_norm": 0.9842670475281874,
      "learning_rate": 0.003,
      "loss": 4.0001,
      "step": 11711
    },
    {
      "epoch": 0.11712,
      "grad_norm": 1.1200872070851704,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 11712
    },
    {
      "epoch": 0.11713,
      "grad_norm": 1.1207768888268173,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 11713
    },
    {
      "epoch": 0.11714,
      "grad_norm": 1.2468830968691615,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 11714
    },
    {
      "epoch": 0.11715,
      "grad_norm": 1.1733299447516492,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 11715
    },
    {
      "epoch": 0.11716,
      "grad_norm": 1.0093206271659385,
      "learning_rate": 0.003,
      "loss": 3.9932,
      "step": 11716
    },
    {
      "epoch": 0.11717,
      "grad_norm": 1.1239353189243093,
      "learning_rate": 0.003,
      "loss": 4.0115,
      "step": 11717
    },
    {
      "epoch": 0.11718,
      "grad_norm": 1.233944823509884,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 11718
    },
    {
      "epoch": 0.11719,
      "grad_norm": 1.2985477914439674,
      "learning_rate": 0.003,
      "loss": 4.0086,
      "step": 11719
    },
    {
      "epoch": 0.1172,
      "grad_norm": 1.1827958279910649,
      "learning_rate": 0.003,
      "loss": 4.012,
      "step": 11720
    },
    {
      "epoch": 0.11721,
      "grad_norm": 1.1446307963150169,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 11721
    },
    {
      "epoch": 0.11722,
      "grad_norm": 1.2455474825934287,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 11722
    },
    {
      "epoch": 0.11723,
      "grad_norm": 1.11475125208383,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 11723
    },
    {
      "epoch": 0.11724,
      "grad_norm": 1.1489869640108812,
      "learning_rate": 0.003,
      "loss": 4.0095,
      "step": 11724
    },
    {
      "epoch": 0.11725,
      "grad_norm": 1.148474380544057,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 11725
    },
    {
      "epoch": 0.11726,
      "grad_norm": 1.1836759594720816,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 11726
    },
    {
      "epoch": 0.11727,
      "grad_norm": 1.1489901497136943,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 11727
    },
    {
      "epoch": 0.11728,
      "grad_norm": 1.2213324589045766,
      "learning_rate": 0.003,
      "loss": 4.0116,
      "step": 11728
    },
    {
      "epoch": 0.11729,
      "grad_norm": 1.2375612775771767,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 11729
    },
    {
      "epoch": 0.1173,
      "grad_norm": 1.0576264778912117,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 11730
    },
    {
      "epoch": 0.11731,
      "grad_norm": 1.1994319511524283,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 11731
    },
    {
      "epoch": 0.11732,
      "grad_norm": 1.1470271295502747,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 11732
    },
    {
      "epoch": 0.11733,
      "grad_norm": 1.0321703975881884,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 11733
    },
    {
      "epoch": 0.11734,
      "grad_norm": 1.1708692520929904,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 11734
    },
    {
      "epoch": 0.11735,
      "grad_norm": 1.1217915693347118,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 11735
    },
    {
      "epoch": 0.11736,
      "grad_norm": 1.1484409584760604,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 11736
    },
    {
      "epoch": 0.11737,
      "grad_norm": 1.1665894926579745,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 11737
    },
    {
      "epoch": 0.11738,
      "grad_norm": 1.2813020571044718,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 11738
    },
    {
      "epoch": 0.11739,
      "grad_norm": 1.3181678380644914,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 11739
    },
    {
      "epoch": 0.1174,
      "grad_norm": 1.120161267354906,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 11740
    },
    {
      "epoch": 0.11741,
      "grad_norm": 1.4288394398505233,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 11741
    },
    {
      "epoch": 0.11742,
      "grad_norm": 1.1845167399331413,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 11742
    },
    {
      "epoch": 0.11743,
      "grad_norm": 1.2099165641421654,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 11743
    },
    {
      "epoch": 0.11744,
      "grad_norm": 1.1904859574947253,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 11744
    },
    {
      "epoch": 0.11745,
      "grad_norm": 1.2576951023850673,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 11745
    },
    {
      "epoch": 0.11746,
      "grad_norm": 1.0964969976409622,
      "learning_rate": 0.003,
      "loss": 3.9998,
      "step": 11746
    },
    {
      "epoch": 0.11747,
      "grad_norm": 1.4120869523116033,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 11747
    },
    {
      "epoch": 0.11748,
      "grad_norm": 1.1148049335571633,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 11748
    },
    {
      "epoch": 0.11749,
      "grad_norm": 1.1734647930140643,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 11749
    },
    {
      "epoch": 0.1175,
      "grad_norm": 1.2482405487706174,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 11750
    },
    {
      "epoch": 0.11751,
      "grad_norm": 1.1483514385526685,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 11751
    },
    {
      "epoch": 0.11752,
      "grad_norm": 1.2500272890819837,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 11752
    },
    {
      "epoch": 0.11753,
      "grad_norm": 1.058210157236084,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 11753
    },
    {
      "epoch": 0.11754,
      "grad_norm": 1.1487736631750007,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 11754
    },
    {
      "epoch": 0.11755,
      "grad_norm": 1.4715443707390885,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 11755
    },
    {
      "epoch": 0.11756,
      "grad_norm": 1.2303628287318422,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 11756
    },
    {
      "epoch": 0.11757,
      "grad_norm": 1.2922823982550529,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 11757
    },
    {
      "epoch": 0.11758,
      "grad_norm": 1.0379265137390694,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 11758
    },
    {
      "epoch": 0.11759,
      "grad_norm": 1.3406374253364324,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 11759
    },
    {
      "epoch": 0.1176,
      "grad_norm": 1.1304813742700188,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 11760
    },
    {
      "epoch": 0.11761,
      "grad_norm": 1.2685923899639884,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 11761
    },
    {
      "epoch": 0.11762,
      "grad_norm": 1.1593920836432694,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 11762
    },
    {
      "epoch": 0.11763,
      "grad_norm": 1.1762381398269397,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 11763
    },
    {
      "epoch": 0.11764,
      "grad_norm": 1.082858055046518,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 11764
    },
    {
      "epoch": 0.11765,
      "grad_norm": 1.191615642444593,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 11765
    },
    {
      "epoch": 0.11766,
      "grad_norm": 1.236828958505827,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 11766
    },
    {
      "epoch": 0.11767,
      "grad_norm": 1.293279569430646,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 11767
    },
    {
      "epoch": 0.11768,
      "grad_norm": 1.3077046844773013,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 11768
    },
    {
      "epoch": 0.11769,
      "grad_norm": 1.0549809352357689,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 11769
    },
    {
      "epoch": 0.1177,
      "grad_norm": 1.0689765908038116,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 11770
    },
    {
      "epoch": 0.11771,
      "grad_norm": 1.3866554981109134,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 11771
    },
    {
      "epoch": 0.11772,
      "grad_norm": 1.182097917949292,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 11772
    },
    {
      "epoch": 0.11773,
      "grad_norm": 1.2642222535276253,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 11773
    },
    {
      "epoch": 0.11774,
      "grad_norm": 0.9460972097814158,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 11774
    },
    {
      "epoch": 0.11775,
      "grad_norm": 1.1967480636343066,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 11775
    },
    {
      "epoch": 0.11776,
      "grad_norm": 0.9332242169274172,
      "learning_rate": 0.003,
      "loss": 4.0151,
      "step": 11776
    },
    {
      "epoch": 0.11777,
      "grad_norm": 1.394104158897881,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 11777
    },
    {
      "epoch": 0.11778,
      "grad_norm": 0.8763693888556454,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 11778
    },
    {
      "epoch": 0.11779,
      "grad_norm": 1.1878650279892633,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 11779
    },
    {
      "epoch": 0.1178,
      "grad_norm": 1.2171141033704371,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 11780
    },
    {
      "epoch": 0.11781,
      "grad_norm": 1.160898330979206,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 11781
    },
    {
      "epoch": 0.11782,
      "grad_norm": 1.3998726789246247,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 11782
    },
    {
      "epoch": 0.11783,
      "grad_norm": 1.274867591692867,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 11783
    },
    {
      "epoch": 0.11784,
      "grad_norm": 1.122679163756025,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 11784
    },
    {
      "epoch": 0.11785,
      "grad_norm": 1.350965890967207,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 11785
    },
    {
      "epoch": 0.11786,
      "grad_norm": 1.1259362312209875,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 11786
    },
    {
      "epoch": 0.11787,
      "grad_norm": 1.1001601351081598,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 11787
    },
    {
      "epoch": 0.11788,
      "grad_norm": 1.117549418935662,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 11788
    },
    {
      "epoch": 0.11789,
      "grad_norm": 0.9414233692391143,
      "learning_rate": 0.003,
      "loss": 4.0039,
      "step": 11789
    },
    {
      "epoch": 0.1179,
      "grad_norm": 1.0644099136425493,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 11790
    },
    {
      "epoch": 0.11791,
      "grad_norm": 1.2857169916877873,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 11791
    },
    {
      "epoch": 0.11792,
      "grad_norm": 0.9110294228440342,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11792
    },
    {
      "epoch": 0.11793,
      "grad_norm": 1.1296405959354765,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 11793
    },
    {
      "epoch": 0.11794,
      "grad_norm": 1.098095724914582,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 11794
    },
    {
      "epoch": 0.11795,
      "grad_norm": 1.326195921928846,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 11795
    },
    {
      "epoch": 0.11796,
      "grad_norm": 0.9378070394703966,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 11796
    },
    {
      "epoch": 0.11797,
      "grad_norm": 1.0521633920909828,
      "learning_rate": 0.003,
      "loss": 4.008,
      "step": 11797
    },
    {
      "epoch": 0.11798,
      "grad_norm": 1.1384690047707438,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 11798
    },
    {
      "epoch": 0.11799,
      "grad_norm": 1.2149860412806044,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 11799
    },
    {
      "epoch": 0.118,
      "grad_norm": 1.189248780058123,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 11800
    },
    {
      "epoch": 0.11801,
      "grad_norm": 1.2211735751337582,
      "learning_rate": 0.003,
      "loss": 3.9968,
      "step": 11801
    },
    {
      "epoch": 0.11802,
      "grad_norm": 1.3801254418581266,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 11802
    },
    {
      "epoch": 0.11803,
      "grad_norm": 1.3818090508225513,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 11803
    },
    {
      "epoch": 0.11804,
      "grad_norm": 1.1283876629557728,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 11804
    },
    {
      "epoch": 0.11805,
      "grad_norm": 1.3132843310339615,
      "learning_rate": 0.003,
      "loss": 4.0138,
      "step": 11805
    },
    {
      "epoch": 0.11806,
      "grad_norm": 1.0209548234461399,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 11806
    },
    {
      "epoch": 0.11807,
      "grad_norm": 1.4212793934895955,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 11807
    },
    {
      "epoch": 0.11808,
      "grad_norm": 0.9218401109406803,
      "learning_rate": 0.003,
      "loss": 4.0076,
      "step": 11808
    },
    {
      "epoch": 0.11809,
      "grad_norm": 1.3124286668656089,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 11809
    },
    {
      "epoch": 0.1181,
      "grad_norm": 1.0303704151552795,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 11810
    },
    {
      "epoch": 0.11811,
      "grad_norm": 1.2557024156725904,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 11811
    },
    {
      "epoch": 0.11812,
      "grad_norm": 1.092659260298407,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 11812
    },
    {
      "epoch": 0.11813,
      "grad_norm": 1.0226098263465304,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 11813
    },
    {
      "epoch": 0.11814,
      "grad_norm": 1.251101082721384,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 11814
    },
    {
      "epoch": 0.11815,
      "grad_norm": 1.1762980524741449,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 11815
    },
    {
      "epoch": 0.11816,
      "grad_norm": 1.1188651716671465,
      "learning_rate": 0.003,
      "loss": 4.0088,
      "step": 11816
    },
    {
      "epoch": 0.11817,
      "grad_norm": 0.914012325190764,
      "learning_rate": 0.003,
      "loss": 4.0056,
      "step": 11817
    },
    {
      "epoch": 0.11818,
      "grad_norm": 1.1572820201025285,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 11818
    },
    {
      "epoch": 0.11819,
      "grad_norm": 1.1097633407410619,
      "learning_rate": 0.003,
      "loss": 3.9945,
      "step": 11819
    },
    {
      "epoch": 0.1182,
      "grad_norm": 1.1315572070408753,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 11820
    },
    {
      "epoch": 0.11821,
      "grad_norm": 1.2010201734662294,
      "learning_rate": 0.003,
      "loss": 3.9999,
      "step": 11821
    },
    {
      "epoch": 0.11822,
      "grad_norm": 1.206264909595148,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 11822
    },
    {
      "epoch": 0.11823,
      "grad_norm": 1.3220064172019355,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 11823
    },
    {
      "epoch": 0.11824,
      "grad_norm": 1.2144802838958413,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 11824
    },
    {
      "epoch": 0.11825,
      "grad_norm": 1.1598148282919596,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 11825
    },
    {
      "epoch": 0.11826,
      "grad_norm": 1.1618791360531502,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 11826
    },
    {
      "epoch": 0.11827,
      "grad_norm": 1.1833304595150924,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 11827
    },
    {
      "epoch": 0.11828,
      "grad_norm": 1.1074286497081403,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 11828
    },
    {
      "epoch": 0.11829,
      "grad_norm": 1.4539648581372682,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 11829
    },
    {
      "epoch": 0.1183,
      "grad_norm": 1.071045817248875,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 11830
    },
    {
      "epoch": 0.11831,
      "grad_norm": 1.317805477765449,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 11831
    },
    {
      "epoch": 0.11832,
      "grad_norm": 1.129955427413547,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 11832
    },
    {
      "epoch": 0.11833,
      "grad_norm": 1.1888617474099106,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 11833
    },
    {
      "epoch": 0.11834,
      "grad_norm": 1.0290127949757615,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 11834
    },
    {
      "epoch": 0.11835,
      "grad_norm": 1.346252405762597,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 11835
    },
    {
      "epoch": 0.11836,
      "grad_norm": 1.0669477339564317,
      "learning_rate": 0.003,
      "loss": 3.9904,
      "step": 11836
    },
    {
      "epoch": 0.11837,
      "grad_norm": 1.2528885489785642,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 11837
    },
    {
      "epoch": 0.11838,
      "grad_norm": 1.2202450943926373,
      "learning_rate": 0.003,
      "loss": 4.0105,
      "step": 11838
    },
    {
      "epoch": 0.11839,
      "grad_norm": 1.1291740083749862,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 11839
    },
    {
      "epoch": 0.1184,
      "grad_norm": 0.973213189261728,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 11840
    },
    {
      "epoch": 0.11841,
      "grad_norm": 1.3080093889925444,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 11841
    },
    {
      "epoch": 0.11842,
      "grad_norm": 1.0994079309142295,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 11842
    },
    {
      "epoch": 0.11843,
      "grad_norm": 1.2562568817457294,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 11843
    },
    {
      "epoch": 0.11844,
      "grad_norm": 0.9846686490918987,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 11844
    },
    {
      "epoch": 0.11845,
      "grad_norm": 1.1930430438444906,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 11845
    },
    {
      "epoch": 0.11846,
      "grad_norm": 1.2231257490273924,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 11846
    },
    {
      "epoch": 0.11847,
      "grad_norm": 1.1539088743932866,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 11847
    },
    {
      "epoch": 0.11848,
      "grad_norm": 1.0931038156263568,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 11848
    },
    {
      "epoch": 0.11849,
      "grad_norm": 0.9927389994471185,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 11849
    },
    {
      "epoch": 0.1185,
      "grad_norm": 1.0974827020855389,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 11850
    },
    {
      "epoch": 0.11851,
      "grad_norm": 1.1400193029050951,
      "learning_rate": 0.003,
      "loss": 4.0115,
      "step": 11851
    },
    {
      "epoch": 0.11852,
      "grad_norm": 1.1260088424765735,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 11852
    },
    {
      "epoch": 0.11853,
      "grad_norm": 1.203276342374635,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 11853
    },
    {
      "epoch": 0.11854,
      "grad_norm": 1.2713596037206674,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 11854
    },
    {
      "epoch": 0.11855,
      "grad_norm": 1.0995469030408622,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 11855
    },
    {
      "epoch": 0.11856,
      "grad_norm": 1.526076675901606,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 11856
    },
    {
      "epoch": 0.11857,
      "grad_norm": 0.9423722463191191,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 11857
    },
    {
      "epoch": 0.11858,
      "grad_norm": 1.2391248570515367,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 11858
    },
    {
      "epoch": 0.11859,
      "grad_norm": 1.1811635655252974,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 11859
    },
    {
      "epoch": 0.1186,
      "grad_norm": 1.2433780055764707,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 11860
    },
    {
      "epoch": 0.11861,
      "grad_norm": 1.1675448550905765,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 11861
    },
    {
      "epoch": 0.11862,
      "grad_norm": 1.522196016713748,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 11862
    },
    {
      "epoch": 0.11863,
      "grad_norm": 0.8786368309806317,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 11863
    },
    {
      "epoch": 0.11864,
      "grad_norm": 1.189025993197089,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 11864
    },
    {
      "epoch": 0.11865,
      "grad_norm": 1.1997885236483348,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 11865
    },
    {
      "epoch": 0.11866,
      "grad_norm": 1.1371332429372165,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 11866
    },
    {
      "epoch": 0.11867,
      "grad_norm": 0.9698227780141195,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 11867
    },
    {
      "epoch": 0.11868,
      "grad_norm": 1.103930407918838,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 11868
    },
    {
      "epoch": 0.11869,
      "grad_norm": 1.302679313668843,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 11869
    },
    {
      "epoch": 0.1187,
      "grad_norm": 1.2640687849220409,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 11870
    },
    {
      "epoch": 0.11871,
      "grad_norm": 1.0418025168606888,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 11871
    },
    {
      "epoch": 0.11872,
      "grad_norm": 1.179394195887056,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 11872
    },
    {
      "epoch": 0.11873,
      "grad_norm": 1.000011524256758,
      "learning_rate": 0.003,
      "loss": 4.0049,
      "step": 11873
    },
    {
      "epoch": 0.11874,
      "grad_norm": 1.1944779241728058,
      "learning_rate": 0.003,
      "loss": 3.9909,
      "step": 11874
    },
    {
      "epoch": 0.11875,
      "grad_norm": 1.0502451220356457,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 11875
    },
    {
      "epoch": 0.11876,
      "grad_norm": 1.296683390399838,
      "learning_rate": 0.003,
      "loss": 4.0065,
      "step": 11876
    },
    {
      "epoch": 0.11877,
      "grad_norm": 0.9584019380860173,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 11877
    },
    {
      "epoch": 0.11878,
      "grad_norm": 1.4464987936871119,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11878
    },
    {
      "epoch": 0.11879,
      "grad_norm": 1.0641775759664829,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 11879
    },
    {
      "epoch": 0.1188,
      "grad_norm": 1.1780706118325417,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 11880
    },
    {
      "epoch": 0.11881,
      "grad_norm": 1.1247991069312226,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 11881
    },
    {
      "epoch": 0.11882,
      "grad_norm": 1.1967564947594456,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 11882
    },
    {
      "epoch": 0.11883,
      "grad_norm": 1.1953315126688369,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 11883
    },
    {
      "epoch": 0.11884,
      "grad_norm": 1.1616352501275355,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 11884
    },
    {
      "epoch": 0.11885,
      "grad_norm": 1.2509035326277451,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 11885
    },
    {
      "epoch": 0.11886,
      "grad_norm": 1.0576045189685839,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 11886
    },
    {
      "epoch": 0.11887,
      "grad_norm": 1.298046893222415,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 11887
    },
    {
      "epoch": 0.11888,
      "grad_norm": 0.9499057577771591,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 11888
    },
    {
      "epoch": 0.11889,
      "grad_norm": 1.4214495434215852,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 11889
    },
    {
      "epoch": 0.1189,
      "grad_norm": 1.303172172126755,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 11890
    },
    {
      "epoch": 0.11891,
      "grad_norm": 1.066754223369426,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 11891
    },
    {
      "epoch": 0.11892,
      "grad_norm": 1.1813142465709445,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 11892
    },
    {
      "epoch": 0.11893,
      "grad_norm": 1.1318573053901286,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 11893
    },
    {
      "epoch": 0.11894,
      "grad_norm": 1.330891602268722,
      "learning_rate": 0.003,
      "loss": 4.01,
      "step": 11894
    },
    {
      "epoch": 0.11895,
      "grad_norm": 0.9313185148188895,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 11895
    },
    {
      "epoch": 0.11896,
      "grad_norm": 1.2289925546102853,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 11896
    },
    {
      "epoch": 0.11897,
      "grad_norm": 1.3072846874199877,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 11897
    },
    {
      "epoch": 0.11898,
      "grad_norm": 1.1161842409655773,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 11898
    },
    {
      "epoch": 0.11899,
      "grad_norm": 1.2666190795801895,
      "learning_rate": 0.003,
      "loss": 4.0014,
      "step": 11899
    },
    {
      "epoch": 0.119,
      "grad_norm": 1.1826671258994363,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 11900
    },
    {
      "epoch": 0.11901,
      "grad_norm": 1.137766702745759,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 11901
    },
    {
      "epoch": 0.11902,
      "grad_norm": 1.3498299757086438,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 11902
    },
    {
      "epoch": 0.11903,
      "grad_norm": 1.0286391095482723,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 11903
    },
    {
      "epoch": 0.11904,
      "grad_norm": 1.291367620845929,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 11904
    },
    {
      "epoch": 0.11905,
      "grad_norm": 1.0161169842231146,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 11905
    },
    {
      "epoch": 0.11906,
      "grad_norm": 1.363924299039333,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 11906
    },
    {
      "epoch": 0.11907,
      "grad_norm": 0.9352441437117577,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 11907
    },
    {
      "epoch": 0.11908,
      "grad_norm": 1.3625840410584036,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 11908
    },
    {
      "epoch": 0.11909,
      "grad_norm": 1.277088311088804,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 11909
    },
    {
      "epoch": 0.1191,
      "grad_norm": 1.212682027964726,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 11910
    },
    {
      "epoch": 0.11911,
      "grad_norm": 1.0288245659041568,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 11911
    },
    {
      "epoch": 0.11912,
      "grad_norm": 1.2480548583938877,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 11912
    },
    {
      "epoch": 0.11913,
      "grad_norm": 0.9880713186954828,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 11913
    },
    {
      "epoch": 0.11914,
      "grad_norm": 1.3610335200499963,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 11914
    },
    {
      "epoch": 0.11915,
      "grad_norm": 1.180493316410989,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 11915
    },
    {
      "epoch": 0.11916,
      "grad_norm": 1.2400050091012045,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 11916
    },
    {
      "epoch": 0.11917,
      "grad_norm": 1.136714338075228,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 11917
    },
    {
      "epoch": 0.11918,
      "grad_norm": 1.2965300537848186,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 11918
    },
    {
      "epoch": 0.11919,
      "grad_norm": 1.1067424063285383,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 11919
    },
    {
      "epoch": 0.1192,
      "grad_norm": 1.143087763102736,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 11920
    },
    {
      "epoch": 0.11921,
      "grad_norm": 1.1278594645317526,
      "learning_rate": 0.003,
      "loss": 3.9941,
      "step": 11921
    },
    {
      "epoch": 0.11922,
      "grad_norm": 1.1065993088756547,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 11922
    },
    {
      "epoch": 0.11923,
      "grad_norm": 1.0404477273555774,
      "learning_rate": 0.003,
      "loss": 3.9843,
      "step": 11923
    },
    {
      "epoch": 0.11924,
      "grad_norm": 1.098544390943939,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 11924
    },
    {
      "epoch": 0.11925,
      "grad_norm": 1.261762237162695,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 11925
    },
    {
      "epoch": 0.11926,
      "grad_norm": 1.3263357716743485,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 11926
    },
    {
      "epoch": 0.11927,
      "grad_norm": 1.3069181491866044,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 11927
    },
    {
      "epoch": 0.11928,
      "grad_norm": 1.2597036992613604,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 11928
    },
    {
      "epoch": 0.11929,
      "grad_norm": 1.1978030782695692,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 11929
    },
    {
      "epoch": 0.1193,
      "grad_norm": 1.122796948660304,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 11930
    },
    {
      "epoch": 0.11931,
      "grad_norm": 1.28639539805541,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 11931
    },
    {
      "epoch": 0.11932,
      "grad_norm": 0.8855138744393674,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 11932
    },
    {
      "epoch": 0.11933,
      "grad_norm": 1.237861600157439,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 11933
    },
    {
      "epoch": 0.11934,
      "grad_norm": 1.0400155549854835,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 11934
    },
    {
      "epoch": 0.11935,
      "grad_norm": 1.253240275222296,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 11935
    },
    {
      "epoch": 0.11936,
      "grad_norm": 1.1208436384532008,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 11936
    },
    {
      "epoch": 0.11937,
      "grad_norm": 1.188206798413594,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 11937
    },
    {
      "epoch": 0.11938,
      "grad_norm": 1.1880639987904473,
      "learning_rate": 0.003,
      "loss": 3.9957,
      "step": 11938
    },
    {
      "epoch": 0.11939,
      "grad_norm": 1.1272675744243077,
      "learning_rate": 0.003,
      "loss": 4.0111,
      "step": 11939
    },
    {
      "epoch": 0.1194,
      "grad_norm": 1.4203426618154515,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 11940
    },
    {
      "epoch": 0.11941,
      "grad_norm": 0.8873694901506768,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 11941
    },
    {
      "epoch": 0.11942,
      "grad_norm": 1.1932380771885094,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 11942
    },
    {
      "epoch": 0.11943,
      "grad_norm": 1.172479996445272,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 11943
    },
    {
      "epoch": 0.11944,
      "grad_norm": 1.197483536826607,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 11944
    },
    {
      "epoch": 0.11945,
      "grad_norm": 1.367144021054736,
      "learning_rate": 0.003,
      "loss": 3.9978,
      "step": 11945
    },
    {
      "epoch": 0.11946,
      "grad_norm": 1.1658588956904559,
      "learning_rate": 0.003,
      "loss": 4.0119,
      "step": 11946
    },
    {
      "epoch": 0.11947,
      "grad_norm": 1.0672867133410382,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 11947
    },
    {
      "epoch": 0.11948,
      "grad_norm": 1.217906799639158,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 11948
    },
    {
      "epoch": 0.11949,
      "grad_norm": 1.10277312621074,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 11949
    },
    {
      "epoch": 0.1195,
      "grad_norm": 1.3388512544054778,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 11950
    },
    {
      "epoch": 0.11951,
      "grad_norm": 1.1385910637331862,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 11951
    },
    {
      "epoch": 0.11952,
      "grad_norm": 1.129078560681352,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 11952
    },
    {
      "epoch": 0.11953,
      "grad_norm": 1.3014140128951632,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 11953
    },
    {
      "epoch": 0.11954,
      "grad_norm": 1.0937861343588238,
      "learning_rate": 0.003,
      "loss": 3.9997,
      "step": 11954
    },
    {
      "epoch": 0.11955,
      "grad_norm": 1.3146945090315645,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 11955
    },
    {
      "epoch": 0.11956,
      "grad_norm": 1.0561747096686331,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 11956
    },
    {
      "epoch": 0.11957,
      "grad_norm": 1.3427476069726214,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 11957
    },
    {
      "epoch": 0.11958,
      "grad_norm": 0.9106335166241142,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 11958
    },
    {
      "epoch": 0.11959,
      "grad_norm": 1.2516591304108617,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 11959
    },
    {
      "epoch": 0.1196,
      "grad_norm": 1.2013239303032632,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 11960
    },
    {
      "epoch": 0.11961,
      "grad_norm": 1.177656487506444,
      "learning_rate": 0.003,
      "loss": 4.0048,
      "step": 11961
    },
    {
      "epoch": 0.11962,
      "grad_norm": 1.2925731396377893,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 11962
    },
    {
      "epoch": 0.11963,
      "grad_norm": 1.2500654665973316,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 11963
    },
    {
      "epoch": 0.11964,
      "grad_norm": 1.0425860200509207,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 11964
    },
    {
      "epoch": 0.11965,
      "grad_norm": 1.2377261274781197,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 11965
    },
    {
      "epoch": 0.11966,
      "grad_norm": 1.2032049076648743,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 11966
    },
    {
      "epoch": 0.11967,
      "grad_norm": 1.1633394155622445,
      "learning_rate": 0.003,
      "loss": 3.9982,
      "step": 11967
    },
    {
      "epoch": 0.11968,
      "grad_norm": 1.319184772941729,
      "learning_rate": 0.003,
      "loss": 4.007,
      "step": 11968
    },
    {
      "epoch": 0.11969,
      "grad_norm": 1.2615950720517894,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 11969
    },
    {
      "epoch": 0.1197,
      "grad_norm": 1.2523901831861972,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 11970
    },
    {
      "epoch": 0.11971,
      "grad_norm": 1.0498669279565727,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 11971
    },
    {
      "epoch": 0.11972,
      "grad_norm": 1.2175500601101223,
      "learning_rate": 0.003,
      "loss": 4.001,
      "step": 11972
    },
    {
      "epoch": 0.11973,
      "grad_norm": 1.130504073169973,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 11973
    },
    {
      "epoch": 0.11974,
      "grad_norm": 1.179537557381126,
      "learning_rate": 0.003,
      "loss": 4.0053,
      "step": 11974
    },
    {
      "epoch": 0.11975,
      "grad_norm": 1.2289925798077932,
      "learning_rate": 0.003,
      "loss": 4.0095,
      "step": 11975
    },
    {
      "epoch": 0.11976,
      "grad_norm": 1.030004757594503,
      "learning_rate": 0.003,
      "loss": 4.009,
      "step": 11976
    },
    {
      "epoch": 0.11977,
      "grad_norm": 1.3000185859855784,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 11977
    },
    {
      "epoch": 0.11978,
      "grad_norm": 1.0874027837487101,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 11978
    },
    {
      "epoch": 0.11979,
      "grad_norm": 1.2673027617231618,
      "learning_rate": 0.003,
      "loss": 4.0117,
      "step": 11979
    },
    {
      "epoch": 0.1198,
      "grad_norm": 1.1353539513587794,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 11980
    },
    {
      "epoch": 0.11981,
      "grad_norm": 1.2446619182105716,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 11981
    },
    {
      "epoch": 0.11982,
      "grad_norm": 1.28462029530281,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 11982
    },
    {
      "epoch": 0.11983,
      "grad_norm": 1.0633275265167388,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 11983
    },
    {
      "epoch": 0.11984,
      "grad_norm": 1.2422112244323555,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 11984
    },
    {
      "epoch": 0.11985,
      "grad_norm": 1.0976759058442642,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 11985
    },
    {
      "epoch": 0.11986,
      "grad_norm": 1.2409565112326744,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 11986
    },
    {
      "epoch": 0.11987,
      "grad_norm": 1.091733743800522,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 11987
    },
    {
      "epoch": 0.11988,
      "grad_norm": 1.0535364843904362,
      "learning_rate": 0.003,
      "loss": 4.0018,
      "step": 11988
    },
    {
      "epoch": 0.11989,
      "grad_norm": 1.2117698380224653,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 11989
    },
    {
      "epoch": 0.1199,
      "grad_norm": 1.2107541460176305,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 11990
    },
    {
      "epoch": 0.11991,
      "grad_norm": 1.128519381242994,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 11991
    },
    {
      "epoch": 0.11992,
      "grad_norm": 1.2912774558797513,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 11992
    },
    {
      "epoch": 0.11993,
      "grad_norm": 0.9859836123745601,
      "learning_rate": 0.003,
      "loss": 4.0026,
      "step": 11993
    },
    {
      "epoch": 0.11994,
      "grad_norm": 1.4062953714578372,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 11994
    },
    {
      "epoch": 0.11995,
      "grad_norm": 0.9854278363001263,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 11995
    },
    {
      "epoch": 0.11996,
      "grad_norm": 1.1283327652944204,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 11996
    },
    {
      "epoch": 0.11997,
      "grad_norm": 1.2578860491282189,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 11997
    },
    {
      "epoch": 0.11998,
      "grad_norm": 1.2201013514042887,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 11998
    },
    {
      "epoch": 0.11999,
      "grad_norm": 1.0464428472868936,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 11999
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.1442643913849708,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 12000
    },
    {
      "epoch": 0.12001,
      "grad_norm": 1.3301044375260127,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 12001
    },
    {
      "epoch": 0.12002,
      "grad_norm": 1.105906653032294,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 12002
    },
    {
      "epoch": 0.12003,
      "grad_norm": 1.2210250779728278,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 12003
    },
    {
      "epoch": 0.12004,
      "grad_norm": 1.1362570687956641,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 12004
    },
    {
      "epoch": 0.12005,
      "grad_norm": 1.2598481775607295,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 12005
    },
    {
      "epoch": 0.12006,
      "grad_norm": 1.238198018149289,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 12006
    },
    {
      "epoch": 0.12007,
      "grad_norm": 1.329447272103125,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 12007
    },
    {
      "epoch": 0.12008,
      "grad_norm": 1.1041434627768059,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 12008
    },
    {
      "epoch": 0.12009,
      "grad_norm": 1.0703280836160836,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 12009
    },
    {
      "epoch": 0.1201,
      "grad_norm": 1.1943502028864001,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 12010
    },
    {
      "epoch": 0.12011,
      "grad_norm": 1.0239676315199393,
      "learning_rate": 0.003,
      "loss": 4.0084,
      "step": 12011
    },
    {
      "epoch": 0.12012,
      "grad_norm": 1.4503121638562684,
      "learning_rate": 0.003,
      "loss": 4.0091,
      "step": 12012
    },
    {
      "epoch": 0.12013,
      "grad_norm": 1.015108621263654,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 12013
    },
    {
      "epoch": 0.12014,
      "grad_norm": 1.389962594266883,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 12014
    },
    {
      "epoch": 0.12015,
      "grad_norm": 1.1252082891848212,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 12015
    },
    {
      "epoch": 0.12016,
      "grad_norm": 1.0487983474875753,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 12016
    },
    {
      "epoch": 0.12017,
      "grad_norm": 1.4431048621531977,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 12017
    },
    {
      "epoch": 0.12018,
      "grad_norm": 0.9166310978141474,
      "learning_rate": 0.003,
      "loss": 3.9751,
      "step": 12018
    },
    {
      "epoch": 0.12019,
      "grad_norm": 1.2506433606927407,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 12019
    },
    {
      "epoch": 0.1202,
      "grad_norm": 1.480480476667452,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 12020
    },
    {
      "epoch": 0.12021,
      "grad_norm": 1.0689179540973426,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 12021
    },
    {
      "epoch": 0.12022,
      "grad_norm": 1.238638964578657,
      "learning_rate": 0.003,
      "loss": 3.9995,
      "step": 12022
    },
    {
      "epoch": 0.12023,
      "grad_norm": 1.217878541780031,
      "learning_rate": 0.003,
      "loss": 4.0104,
      "step": 12023
    },
    {
      "epoch": 0.12024,
      "grad_norm": 1.0851112909646494,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 12024
    },
    {
      "epoch": 0.12025,
      "grad_norm": 1.1604224142434416,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 12025
    },
    {
      "epoch": 0.12026,
      "grad_norm": 1.031844987403058,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 12026
    },
    {
      "epoch": 0.12027,
      "grad_norm": 1.1595745336365029,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 12027
    },
    {
      "epoch": 0.12028,
      "grad_norm": 1.1947817751700118,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 12028
    },
    {
      "epoch": 0.12029,
      "grad_norm": 1.0497319637213312,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 12029
    },
    {
      "epoch": 0.1203,
      "grad_norm": 1.0887730490228416,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 12030
    },
    {
      "epoch": 0.12031,
      "grad_norm": 1.2639844964964313,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 12031
    },
    {
      "epoch": 0.12032,
      "grad_norm": 1.1476592744733245,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 12032
    },
    {
      "epoch": 0.12033,
      "grad_norm": 1.4253662214975455,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 12033
    },
    {
      "epoch": 0.12034,
      "grad_norm": 1.0367483017417718,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 12034
    },
    {
      "epoch": 0.12035,
      "grad_norm": 1.3672538599328856,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 12035
    },
    {
      "epoch": 0.12036,
      "grad_norm": 1.088814881263226,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 12036
    },
    {
      "epoch": 0.12037,
      "grad_norm": 1.2526346282054286,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 12037
    },
    {
      "epoch": 0.12038,
      "grad_norm": 1.160570553266028,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 12038
    },
    {
      "epoch": 0.12039,
      "grad_norm": 1.1199414106378185,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 12039
    },
    {
      "epoch": 0.1204,
      "grad_norm": 1.1004831372504815,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 12040
    },
    {
      "epoch": 0.12041,
      "grad_norm": 1.168791590589254,
      "learning_rate": 0.003,
      "loss": 4.0087,
      "step": 12041
    },
    {
      "epoch": 0.12042,
      "grad_norm": 1.1345050629943982,
      "learning_rate": 0.003,
      "loss": 4.0018,
      "step": 12042
    },
    {
      "epoch": 0.12043,
      "grad_norm": 1.1312277297552067,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 12043
    },
    {
      "epoch": 0.12044,
      "grad_norm": 1.1218607973489465,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 12044
    },
    {
      "epoch": 0.12045,
      "grad_norm": 1.3439830827965968,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 12045
    },
    {
      "epoch": 0.12046,
      "grad_norm": 1.194576377831826,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 12046
    },
    {
      "epoch": 0.12047,
      "grad_norm": 1.2242887448551507,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 12047
    },
    {
      "epoch": 0.12048,
      "grad_norm": 1.1869139877584274,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 12048
    },
    {
      "epoch": 0.12049,
      "grad_norm": 1.23366882798688,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 12049
    },
    {
      "epoch": 0.1205,
      "grad_norm": 1.1321804170340066,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 12050
    },
    {
      "epoch": 0.12051,
      "grad_norm": 1.2506912484400732,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 12051
    },
    {
      "epoch": 0.12052,
      "grad_norm": 1.171321048284884,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 12052
    },
    {
      "epoch": 0.12053,
      "grad_norm": 1.5483651625241348,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 12053
    },
    {
      "epoch": 0.12054,
      "grad_norm": 0.9450023337340406,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 12054
    },
    {
      "epoch": 0.12055,
      "grad_norm": 1.2518788502247158,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 12055
    },
    {
      "epoch": 0.12056,
      "grad_norm": 1.2025596603154725,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 12056
    },
    {
      "epoch": 0.12057,
      "grad_norm": 1.2521406673057627,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 12057
    },
    {
      "epoch": 0.12058,
      "grad_norm": 1.0526770017752556,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 12058
    },
    {
      "epoch": 0.12059,
      "grad_norm": 1.0626535398232526,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 12059
    },
    {
      "epoch": 0.1206,
      "grad_norm": 1.3336122110831834,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 12060
    },
    {
      "epoch": 0.12061,
      "grad_norm": 1.118599017531215,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 12061
    },
    {
      "epoch": 0.12062,
      "grad_norm": 1.363473374169237,
      "learning_rate": 0.003,
      "loss": 4.0116,
      "step": 12062
    },
    {
      "epoch": 0.12063,
      "grad_norm": 0.8786979557351099,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 12063
    },
    {
      "epoch": 0.12064,
      "grad_norm": 1.1947466023568365,
      "learning_rate": 0.003,
      "loss": 3.9998,
      "step": 12064
    },
    {
      "epoch": 0.12065,
      "grad_norm": 1.215993473222538,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 12065
    },
    {
      "epoch": 0.12066,
      "grad_norm": 1.2447752310217368,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 12066
    },
    {
      "epoch": 0.12067,
      "grad_norm": 1.0837006580418418,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 12067
    },
    {
      "epoch": 0.12068,
      "grad_norm": 1.3971605398432154,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 12068
    },
    {
      "epoch": 0.12069,
      "grad_norm": 1.007380264646283,
      "learning_rate": 0.003,
      "loss": 4.0144,
      "step": 12069
    },
    {
      "epoch": 0.1207,
      "grad_norm": 1.3122686823444492,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 12070
    },
    {
      "epoch": 0.12071,
      "grad_norm": 1.0684153514164216,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 12071
    },
    {
      "epoch": 0.12072,
      "grad_norm": 1.2604810107038449,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 12072
    },
    {
      "epoch": 0.12073,
      "grad_norm": 1.155441969210191,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 12073
    },
    {
      "epoch": 0.12074,
      "grad_norm": 1.5303984928048056,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 12074
    },
    {
      "epoch": 0.12075,
      "grad_norm": 0.8280353697240316,
      "learning_rate": 0.003,
      "loss": 3.9775,
      "step": 12075
    },
    {
      "epoch": 0.12076,
      "grad_norm": 0.9625486618108077,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 12076
    },
    {
      "epoch": 0.12077,
      "grad_norm": 1.2545752671715014,
      "learning_rate": 0.003,
      "loss": 3.9819,
      "step": 12077
    },
    {
      "epoch": 0.12078,
      "grad_norm": 1.1154569818666091,
      "learning_rate": 0.003,
      "loss": 4.0059,
      "step": 12078
    },
    {
      "epoch": 0.12079,
      "grad_norm": 1.141600191269537,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 12079
    },
    {
      "epoch": 0.1208,
      "grad_norm": 1.238179881437241,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 12080
    },
    {
      "epoch": 0.12081,
      "grad_norm": 1.165698596146827,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 12081
    },
    {
      "epoch": 0.12082,
      "grad_norm": 0.8889092422891364,
      "learning_rate": 0.003,
      "loss": 4.0112,
      "step": 12082
    },
    {
      "epoch": 0.12083,
      "grad_norm": 1.2543161652007244,
      "learning_rate": 0.003,
      "loss": 4.0064,
      "step": 12083
    },
    {
      "epoch": 0.12084,
      "grad_norm": 1.5327269316533685,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 12084
    },
    {
      "epoch": 0.12085,
      "grad_norm": 1.0365382149666054,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 12085
    },
    {
      "epoch": 0.12086,
      "grad_norm": 1.316513743782211,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 12086
    },
    {
      "epoch": 0.12087,
      "grad_norm": 1.1582107753316935,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 12087
    },
    {
      "epoch": 0.12088,
      "grad_norm": 1.2026972854083593,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 12088
    },
    {
      "epoch": 0.12089,
      "grad_norm": 1.1865818257324852,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 12089
    },
    {
      "epoch": 0.1209,
      "grad_norm": 1.213904883132537,
      "learning_rate": 0.003,
      "loss": 4.0046,
      "step": 12090
    },
    {
      "epoch": 0.12091,
      "grad_norm": 1.0648805961872354,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 12091
    },
    {
      "epoch": 0.12092,
      "grad_norm": 1.1813755883049362,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 12092
    },
    {
      "epoch": 0.12093,
      "grad_norm": 1.2292752165558312,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 12093
    },
    {
      "epoch": 0.12094,
      "grad_norm": 1.259437705088851,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 12094
    },
    {
      "epoch": 0.12095,
      "grad_norm": 1.1137767158004688,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 12095
    },
    {
      "epoch": 0.12096,
      "grad_norm": 1.3600842565366165,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 12096
    },
    {
      "epoch": 0.12097,
      "grad_norm": 0.8881258371739744,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 12097
    },
    {
      "epoch": 0.12098,
      "grad_norm": 1.2265846606485018,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 12098
    },
    {
      "epoch": 0.12099,
      "grad_norm": 1.2320301694381899,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 12099
    },
    {
      "epoch": 0.121,
      "grad_norm": 1.1555002782743249,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 12100
    },
    {
      "epoch": 0.12101,
      "grad_norm": 1.26009712635675,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 12101
    },
    {
      "epoch": 0.12102,
      "grad_norm": 1.2765674036907484,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 12102
    },
    {
      "epoch": 0.12103,
      "grad_norm": 1.2664976256713367,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 12103
    },
    {
      "epoch": 0.12104,
      "grad_norm": 0.9432051107004329,
      "learning_rate": 0.003,
      "loss": 3.9953,
      "step": 12104
    },
    {
      "epoch": 0.12105,
      "grad_norm": 1.326445164229193,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 12105
    },
    {
      "epoch": 0.12106,
      "grad_norm": 1.0473709580825368,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 12106
    },
    {
      "epoch": 0.12107,
      "grad_norm": 1.5389171702810274,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 12107
    },
    {
      "epoch": 0.12108,
      "grad_norm": 0.961160561659805,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 12108
    },
    {
      "epoch": 0.12109,
      "grad_norm": 1.1860213316322281,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 12109
    },
    {
      "epoch": 0.1211,
      "grad_norm": 1.276050655690645,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 12110
    },
    {
      "epoch": 0.12111,
      "grad_norm": 1.0106495220145681,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 12111
    },
    {
      "epoch": 0.12112,
      "grad_norm": 1.1404629571385734,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 12112
    },
    {
      "epoch": 0.12113,
      "grad_norm": 1.3379694333364878,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 12113
    },
    {
      "epoch": 0.12114,
      "grad_norm": 1.1057144085632993,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 12114
    },
    {
      "epoch": 0.12115,
      "grad_norm": 1.2389514049197021,
      "learning_rate": 0.003,
      "loss": 3.9896,
      "step": 12115
    },
    {
      "epoch": 0.12116,
      "grad_norm": 1.081032310728611,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 12116
    },
    {
      "epoch": 0.12117,
      "grad_norm": 1.7808747808197203,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 12117
    },
    {
      "epoch": 0.12118,
      "grad_norm": 0.9804521331485236,
      "learning_rate": 0.003,
      "loss": 3.9977,
      "step": 12118
    },
    {
      "epoch": 0.12119,
      "grad_norm": 1.2622545631694635,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 12119
    },
    {
      "epoch": 0.1212,
      "grad_norm": 1.025608924940026,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 12120
    },
    {
      "epoch": 0.12121,
      "grad_norm": 1.2687986530932442,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 12121
    },
    {
      "epoch": 0.12122,
      "grad_norm": 1.2569166469856572,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 12122
    },
    {
      "epoch": 0.12123,
      "grad_norm": 1.0888418797208075,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 12123
    },
    {
      "epoch": 0.12124,
      "grad_norm": 1.4471321534196813,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 12124
    },
    {
      "epoch": 0.12125,
      "grad_norm": 1.0570726579438743,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 12125
    },
    {
      "epoch": 0.12126,
      "grad_norm": 1.1452310973638715,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 12126
    },
    {
      "epoch": 0.12127,
      "grad_norm": 1.0951003459920567,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 12127
    },
    {
      "epoch": 0.12128,
      "grad_norm": 1.0924282991917444,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 12128
    },
    {
      "epoch": 0.12129,
      "grad_norm": 1.077336233158983,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 12129
    },
    {
      "epoch": 0.1213,
      "grad_norm": 1.0698116679642007,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 12130
    },
    {
      "epoch": 0.12131,
      "grad_norm": 1.0569838113736956,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 12131
    },
    {
      "epoch": 0.12132,
      "grad_norm": 1.2236310443056324,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 12132
    },
    {
      "epoch": 0.12133,
      "grad_norm": 1.210938464105007,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 12133
    },
    {
      "epoch": 0.12134,
      "grad_norm": 1.1431794281160383,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 12134
    },
    {
      "epoch": 0.12135,
      "grad_norm": 1.2120388488293794,
      "learning_rate": 0.003,
      "loss": 4.0163,
      "step": 12135
    },
    {
      "epoch": 0.12136,
      "grad_norm": 1.163220939636625,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 12136
    },
    {
      "epoch": 0.12137,
      "grad_norm": 1.408415883667614,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 12137
    },
    {
      "epoch": 0.12138,
      "grad_norm": 1.019972444797985,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 12138
    },
    {
      "epoch": 0.12139,
      "grad_norm": 1.4794420127960417,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 12139
    },
    {
      "epoch": 0.1214,
      "grad_norm": 0.9942984332809602,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 12140
    },
    {
      "epoch": 0.12141,
      "grad_norm": 1.198313384318162,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 12141
    },
    {
      "epoch": 0.12142,
      "grad_norm": 1.1235175502034191,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 12142
    },
    {
      "epoch": 0.12143,
      "grad_norm": 1.2021305789112728,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 12143
    },
    {
      "epoch": 0.12144,
      "grad_norm": 1.2036254675483424,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 12144
    },
    {
      "epoch": 0.12145,
      "grad_norm": 1.0563897356132683,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 12145
    },
    {
      "epoch": 0.12146,
      "grad_norm": 1.3892405792341498,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 12146
    },
    {
      "epoch": 0.12147,
      "grad_norm": 1.1584776401297876,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 12147
    },
    {
      "epoch": 0.12148,
      "grad_norm": 1.4771991665797888,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 12148
    },
    {
      "epoch": 0.12149,
      "grad_norm": 0.9036675444370385,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 12149
    },
    {
      "epoch": 0.1215,
      "grad_norm": 1.0455355580610362,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 12150
    },
    {
      "epoch": 0.12151,
      "grad_norm": 1.2718005495324547,
      "learning_rate": 0.003,
      "loss": 4.0015,
      "step": 12151
    },
    {
      "epoch": 0.12152,
      "grad_norm": 1.1342945095583343,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 12152
    },
    {
      "epoch": 0.12153,
      "grad_norm": 1.2466145803684447,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 12153
    },
    {
      "epoch": 0.12154,
      "grad_norm": 1.0819537046373542,
      "learning_rate": 0.003,
      "loss": 3.9943,
      "step": 12154
    },
    {
      "epoch": 0.12155,
      "grad_norm": 1.1602226084211311,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 12155
    },
    {
      "epoch": 0.12156,
      "grad_norm": 1.4009042185837033,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 12156
    },
    {
      "epoch": 0.12157,
      "grad_norm": 1.1384726322964522,
      "learning_rate": 0.003,
      "loss": 3.9852,
      "step": 12157
    },
    {
      "epoch": 0.12158,
      "grad_norm": 1.1226291807229503,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 12158
    },
    {
      "epoch": 0.12159,
      "grad_norm": 1.188819542203544,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 12159
    },
    {
      "epoch": 0.1216,
      "grad_norm": 1.186378564541427,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 12160
    },
    {
      "epoch": 0.12161,
      "grad_norm": 1.1430987105677886,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 12161
    },
    {
      "epoch": 0.12162,
      "grad_norm": 1.0984081471542013,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 12162
    },
    {
      "epoch": 0.12163,
      "grad_norm": 1.2375098267751024,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 12163
    },
    {
      "epoch": 0.12164,
      "grad_norm": 1.3998298901471347,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 12164
    },
    {
      "epoch": 0.12165,
      "grad_norm": 0.9344084284754219,
      "learning_rate": 0.003,
      "loss": 4.0049,
      "step": 12165
    },
    {
      "epoch": 0.12166,
      "grad_norm": 1.512859671545336,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 12166
    },
    {
      "epoch": 0.12167,
      "grad_norm": 1.1164706886354052,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 12167
    },
    {
      "epoch": 0.12168,
      "grad_norm": 1.0269895024920574,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 12168
    },
    {
      "epoch": 0.12169,
      "grad_norm": 1.2437063976530573,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 12169
    },
    {
      "epoch": 0.1217,
      "grad_norm": 1.1616197418244247,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 12170
    },
    {
      "epoch": 0.12171,
      "grad_norm": 1.126266794256949,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 12171
    },
    {
      "epoch": 0.12172,
      "grad_norm": 1.365627633085238,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 12172
    },
    {
      "epoch": 0.12173,
      "grad_norm": 1.09988351709105,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 12173
    },
    {
      "epoch": 0.12174,
      "grad_norm": 1.199748433735864,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 12174
    },
    {
      "epoch": 0.12175,
      "grad_norm": 1.088769417635092,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 12175
    },
    {
      "epoch": 0.12176,
      "grad_norm": 1.2661905434748413,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 12176
    },
    {
      "epoch": 0.12177,
      "grad_norm": 1.0836807067546872,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 12177
    },
    {
      "epoch": 0.12178,
      "grad_norm": 1.2934576723672395,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 12178
    },
    {
      "epoch": 0.12179,
      "grad_norm": 1.1831772974220784,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 12179
    },
    {
      "epoch": 0.1218,
      "grad_norm": 1.1747496433726747,
      "learning_rate": 0.003,
      "loss": 4.0085,
      "step": 12180
    },
    {
      "epoch": 0.12181,
      "grad_norm": 1.3179604690170197,
      "learning_rate": 0.003,
      "loss": 4.01,
      "step": 12181
    },
    {
      "epoch": 0.12182,
      "grad_norm": 1.135089293334139,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 12182
    },
    {
      "epoch": 0.12183,
      "grad_norm": 1.3221372241316014,
      "learning_rate": 0.003,
      "loss": 3.9976,
      "step": 12183
    },
    {
      "epoch": 0.12184,
      "grad_norm": 1.0124457856344928,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 12184
    },
    {
      "epoch": 0.12185,
      "grad_norm": 1.3589982758664614,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 12185
    },
    {
      "epoch": 0.12186,
      "grad_norm": 1.0233570129876237,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 12186
    },
    {
      "epoch": 0.12187,
      "grad_norm": 1.3079895671489778,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 12187
    },
    {
      "epoch": 0.12188,
      "grad_norm": 1.1489865721931825,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 12188
    },
    {
      "epoch": 0.12189,
      "grad_norm": 1.2412768268410446,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 12189
    },
    {
      "epoch": 0.1219,
      "grad_norm": 1.114513378698603,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 12190
    },
    {
      "epoch": 0.12191,
      "grad_norm": 1.3362592660486117,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 12191
    },
    {
      "epoch": 0.12192,
      "grad_norm": 1.0692297451970725,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 12192
    },
    {
      "epoch": 0.12193,
      "grad_norm": 1.2465778007234973,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 12193
    },
    {
      "epoch": 0.12194,
      "grad_norm": 1.1092003193835691,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 12194
    },
    {
      "epoch": 0.12195,
      "grad_norm": 1.1696708395236086,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 12195
    },
    {
      "epoch": 0.12196,
      "grad_norm": 1.1112321590252383,
      "learning_rate": 0.003,
      "loss": 4.0079,
      "step": 12196
    },
    {
      "epoch": 0.12197,
      "grad_norm": 1.2841076485330796,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 12197
    },
    {
      "epoch": 0.12198,
      "grad_norm": 1.107702913323641,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 12198
    },
    {
      "epoch": 0.12199,
      "grad_norm": 1.1836188082240122,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 12199
    },
    {
      "epoch": 0.122,
      "grad_norm": 1.0702302603416496,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 12200
    },
    {
      "epoch": 0.12201,
      "grad_norm": 1.357237574537661,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 12201
    },
    {
      "epoch": 0.12202,
      "grad_norm": 1.107067299040779,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 12202
    },
    {
      "epoch": 0.12203,
      "grad_norm": 1.2133228664945446,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 12203
    },
    {
      "epoch": 0.12204,
      "grad_norm": 1.122185688848671,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 12204
    },
    {
      "epoch": 0.12205,
      "grad_norm": 1.2378973407718126,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 12205
    },
    {
      "epoch": 0.12206,
      "grad_norm": 1.0146025080029806,
      "learning_rate": 0.003,
      "loss": 3.9975,
      "step": 12206
    },
    {
      "epoch": 0.12207,
      "grad_norm": 1.2371951440426439,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 12207
    },
    {
      "epoch": 0.12208,
      "grad_norm": 1.1941790413272386,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 12208
    },
    {
      "epoch": 0.12209,
      "grad_norm": 1.237443502265726,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 12209
    },
    {
      "epoch": 0.1221,
      "grad_norm": 1.1191757624817986,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 12210
    },
    {
      "epoch": 0.12211,
      "grad_norm": 1.6015066005243488,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 12211
    },
    {
      "epoch": 0.12212,
      "grad_norm": 1.0912122851858126,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 12212
    },
    {
      "epoch": 0.12213,
      "grad_norm": 1.22523715982098,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 12213
    },
    {
      "epoch": 0.12214,
      "grad_norm": 1.025716267326302,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 12214
    },
    {
      "epoch": 0.12215,
      "grad_norm": 1.1976040863355408,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 12215
    },
    {
      "epoch": 0.12216,
      "grad_norm": 0.965569302971212,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 12216
    },
    {
      "epoch": 0.12217,
      "grad_norm": 1.1391293564356433,
      "learning_rate": 0.003,
      "loss": 4.0104,
      "step": 12217
    },
    {
      "epoch": 0.12218,
      "grad_norm": 1.0031823383720437,
      "learning_rate": 0.003,
      "loss": 4.0014,
      "step": 12218
    },
    {
      "epoch": 0.12219,
      "grad_norm": 1.174272945089118,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 12219
    },
    {
      "epoch": 0.1222,
      "grad_norm": 1.1436677312129953,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 12220
    },
    {
      "epoch": 0.12221,
      "grad_norm": 1.2898099150708564,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 12221
    },
    {
      "epoch": 0.12222,
      "grad_norm": 1.09890346663295,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 12222
    },
    {
      "epoch": 0.12223,
      "grad_norm": 1.3194243732788846,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 12223
    },
    {
      "epoch": 0.12224,
      "grad_norm": 1.1829019105273342,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 12224
    },
    {
      "epoch": 0.12225,
      "grad_norm": 0.9974911798449131,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 12225
    },
    {
      "epoch": 0.12226,
      "grad_norm": 1.2045148338615028,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 12226
    },
    {
      "epoch": 0.12227,
      "grad_norm": 1.2757758918468345,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 12227
    },
    {
      "epoch": 0.12228,
      "grad_norm": 1.3030462989555127,
      "learning_rate": 0.003,
      "loss": 4.0033,
      "step": 12228
    },
    {
      "epoch": 0.12229,
      "grad_norm": 1.1447965290278583,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 12229
    },
    {
      "epoch": 0.1223,
      "grad_norm": 1.1830532774411873,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 12230
    },
    {
      "epoch": 0.12231,
      "grad_norm": 0.9927189165650173,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 12231
    },
    {
      "epoch": 0.12232,
      "grad_norm": 1.1343637660969617,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 12232
    },
    {
      "epoch": 0.12233,
      "grad_norm": 1.2339551740369135,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 12233
    },
    {
      "epoch": 0.12234,
      "grad_norm": 1.2153175541284331,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 12234
    },
    {
      "epoch": 0.12235,
      "grad_norm": 1.2112370667362282,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 12235
    },
    {
      "epoch": 0.12236,
      "grad_norm": 1.2680813133769673,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 12236
    },
    {
      "epoch": 0.12237,
      "grad_norm": 1.2440893879161208,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 12237
    },
    {
      "epoch": 0.12238,
      "grad_norm": 1.0938588058052665,
      "learning_rate": 0.003,
      "loss": 4.0034,
      "step": 12238
    },
    {
      "epoch": 0.12239,
      "grad_norm": 1.3079644744847825,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 12239
    },
    {
      "epoch": 0.1224,
      "grad_norm": 1.2032893752750062,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 12240
    },
    {
      "epoch": 0.12241,
      "grad_norm": 1.5263423425857818,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 12241
    },
    {
      "epoch": 0.12242,
      "grad_norm": 1.1123122789832365,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 12242
    },
    {
      "epoch": 0.12243,
      "grad_norm": 1.0556139683461747,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 12243
    },
    {
      "epoch": 0.12244,
      "grad_norm": 1.3289277594180184,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 12244
    },
    {
      "epoch": 0.12245,
      "grad_norm": 1.0073269421814348,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 12245
    },
    {
      "epoch": 0.12246,
      "grad_norm": 1.4046094869371852,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 12246
    },
    {
      "epoch": 0.12247,
      "grad_norm": 1.051725583525556,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 12247
    },
    {
      "epoch": 0.12248,
      "grad_norm": 1.2368654496027767,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 12248
    },
    {
      "epoch": 0.12249,
      "grad_norm": 1.1041322243002467,
      "learning_rate": 0.003,
      "loss": 3.9916,
      "step": 12249
    },
    {
      "epoch": 0.1225,
      "grad_norm": 1.2524463553074452,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 12250
    },
    {
      "epoch": 0.12251,
      "grad_norm": 1.1622632606319239,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 12251
    },
    {
      "epoch": 0.12252,
      "grad_norm": 1.3179849114848947,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 12252
    },
    {
      "epoch": 0.12253,
      "grad_norm": 1.0090202086355522,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 12253
    },
    {
      "epoch": 0.12254,
      "grad_norm": 1.3230018748827679,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 12254
    },
    {
      "epoch": 0.12255,
      "grad_norm": 1.040049687066231,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 12255
    },
    {
      "epoch": 0.12256,
      "grad_norm": 1.390916117154564,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 12256
    },
    {
      "epoch": 0.12257,
      "grad_norm": 1.1859129806624236,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 12257
    },
    {
      "epoch": 0.12258,
      "grad_norm": 1.0671361782302644,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 12258
    },
    {
      "epoch": 0.12259,
      "grad_norm": 1.2161759605513203,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 12259
    },
    {
      "epoch": 0.1226,
      "grad_norm": 1.2804176653736112,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 12260
    },
    {
      "epoch": 0.12261,
      "grad_norm": 0.983275306429123,
      "learning_rate": 0.003,
      "loss": 4.0096,
      "step": 12261
    },
    {
      "epoch": 0.12262,
      "grad_norm": 1.2709090680095843,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 12262
    },
    {
      "epoch": 0.12263,
      "grad_norm": 1.1752535790764227,
      "learning_rate": 0.003,
      "loss": 3.9997,
      "step": 12263
    },
    {
      "epoch": 0.12264,
      "grad_norm": 1.276868126228694,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 12264
    },
    {
      "epoch": 0.12265,
      "grad_norm": 1.1411677072819246,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 12265
    },
    {
      "epoch": 0.12266,
      "grad_norm": 1.0902306136530755,
      "learning_rate": 0.003,
      "loss": 3.9951,
      "step": 12266
    },
    {
      "epoch": 0.12267,
      "grad_norm": 1.3205667988804515,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 12267
    },
    {
      "epoch": 0.12268,
      "grad_norm": 1.0465548404396692,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 12268
    },
    {
      "epoch": 0.12269,
      "grad_norm": 1.525340885064841,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 12269
    },
    {
      "epoch": 0.1227,
      "grad_norm": 0.898532904288709,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 12270
    },
    {
      "epoch": 0.12271,
      "grad_norm": 1.1625001192804503,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 12271
    },
    {
      "epoch": 0.12272,
      "grad_norm": 1.2506428969604266,
      "learning_rate": 0.003,
      "loss": 3.9987,
      "step": 12272
    },
    {
      "epoch": 0.12273,
      "grad_norm": 1.1400665488880517,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 12273
    },
    {
      "epoch": 0.12274,
      "grad_norm": 1.4807402303846313,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 12274
    },
    {
      "epoch": 0.12275,
      "grad_norm": 0.9718032014863593,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 12275
    },
    {
      "epoch": 0.12276,
      "grad_norm": 1.1004371990631832,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 12276
    },
    {
      "epoch": 0.12277,
      "grad_norm": 1.2316751692589583,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 12277
    },
    {
      "epoch": 0.12278,
      "grad_norm": 1.2692996495372502,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 12278
    },
    {
      "epoch": 0.12279,
      "grad_norm": 1.1298346790675529,
      "learning_rate": 0.003,
      "loss": 3.9906,
      "step": 12279
    },
    {
      "epoch": 0.1228,
      "grad_norm": 1.0241566953582193,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 12280
    },
    {
      "epoch": 0.12281,
      "grad_norm": 1.2259128020142214,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 12281
    },
    {
      "epoch": 0.12282,
      "grad_norm": 1.1727117627822834,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 12282
    },
    {
      "epoch": 0.12283,
      "grad_norm": 1.4802551869138976,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 12283
    },
    {
      "epoch": 0.12284,
      "grad_norm": 0.8562657072581173,
      "learning_rate": 0.003,
      "loss": 4.0054,
      "step": 12284
    },
    {
      "epoch": 0.12285,
      "grad_norm": 1.143034764039563,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 12285
    },
    {
      "epoch": 0.12286,
      "grad_norm": 1.2625712453206883,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 12286
    },
    {
      "epoch": 0.12287,
      "grad_norm": 1.0830382002896604,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 12287
    },
    {
      "epoch": 0.12288,
      "grad_norm": 1.1182157999246587,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 12288
    },
    {
      "epoch": 0.12289,
      "grad_norm": 0.9493864192646565,
      "learning_rate": 0.003,
      "loss": 3.9938,
      "step": 12289
    },
    {
      "epoch": 0.1229,
      "grad_norm": 1.1563869494269485,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 12290
    },
    {
      "epoch": 0.12291,
      "grad_norm": 1.2541160596246155,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 12291
    },
    {
      "epoch": 0.12292,
      "grad_norm": 1.2248213033814566,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 12292
    },
    {
      "epoch": 0.12293,
      "grad_norm": 1.2000885923117754,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 12293
    },
    {
      "epoch": 0.12294,
      "grad_norm": 1.0981859496339343,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 12294
    },
    {
      "epoch": 0.12295,
      "grad_norm": 1.1575443564037111,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 12295
    },
    {
      "epoch": 0.12296,
      "grad_norm": 1.301359422856533,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 12296
    },
    {
      "epoch": 0.12297,
      "grad_norm": 1.2514385607651537,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 12297
    },
    {
      "epoch": 0.12298,
      "grad_norm": 1.273736329357012,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 12298
    },
    {
      "epoch": 0.12299,
      "grad_norm": 1.3886346632204654,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 12299
    },
    {
      "epoch": 0.123,
      "grad_norm": 1.0872428215026426,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 12300
    },
    {
      "epoch": 0.12301,
      "grad_norm": 1.370513264865828,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 12301
    },
    {
      "epoch": 0.12302,
      "grad_norm": 0.8505983228963824,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 12302
    },
    {
      "epoch": 0.12303,
      "grad_norm": 0.9457348435365074,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 12303
    },
    {
      "epoch": 0.12304,
      "grad_norm": 0.9951681172464344,
      "learning_rate": 0.003,
      "loss": 4.0026,
      "step": 12304
    },
    {
      "epoch": 0.12305,
      "grad_norm": 1.0681593913605725,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 12305
    },
    {
      "epoch": 0.12306,
      "grad_norm": 1.2916709766050543,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 12306
    },
    {
      "epoch": 0.12307,
      "grad_norm": 1.2292127687463348,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 12307
    },
    {
      "epoch": 0.12308,
      "grad_norm": 1.139218468833114,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 12308
    },
    {
      "epoch": 0.12309,
      "grad_norm": 1.2485231059799966,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 12309
    },
    {
      "epoch": 0.1231,
      "grad_norm": 1.285310561114578,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 12310
    },
    {
      "epoch": 0.12311,
      "grad_norm": 1.2794706200625143,
      "learning_rate": 0.003,
      "loss": 4.0107,
      "step": 12311
    },
    {
      "epoch": 0.12312,
      "grad_norm": 1.0817853226112992,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 12312
    },
    {
      "epoch": 0.12313,
      "grad_norm": 1.4834866026044042,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 12313
    },
    {
      "epoch": 0.12314,
      "grad_norm": 0.9875551326088596,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 12314
    },
    {
      "epoch": 0.12315,
      "grad_norm": 1.4829417421206113,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 12315
    },
    {
      "epoch": 0.12316,
      "grad_norm": 1.0408210412276793,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 12316
    },
    {
      "epoch": 0.12317,
      "grad_norm": 1.4264169348104334,
      "learning_rate": 0.003,
      "loss": 4.0017,
      "step": 12317
    },
    {
      "epoch": 0.12318,
      "grad_norm": 1.0989074999876036,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 12318
    },
    {
      "epoch": 0.12319,
      "grad_norm": 1.2243058952350059,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 12319
    },
    {
      "epoch": 0.1232,
      "grad_norm": 1.0845777556091951,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 12320
    },
    {
      "epoch": 0.12321,
      "grad_norm": 1.2109554941735854,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 12321
    },
    {
      "epoch": 0.12322,
      "grad_norm": 1.0902149341547924,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 12322
    },
    {
      "epoch": 0.12323,
      "grad_norm": 1.207304856681732,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 12323
    },
    {
      "epoch": 0.12324,
      "grad_norm": 1.1095092917775558,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 12324
    },
    {
      "epoch": 0.12325,
      "grad_norm": 1.2357130353809473,
      "learning_rate": 0.003,
      "loss": 3.9895,
      "step": 12325
    },
    {
      "epoch": 0.12326,
      "grad_norm": 1.0963499157957226,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 12326
    },
    {
      "epoch": 0.12327,
      "grad_norm": 1.340824324855678,
      "learning_rate": 0.003,
      "loss": 3.999,
      "step": 12327
    },
    {
      "epoch": 0.12328,
      "grad_norm": 1.2449364637709306,
      "learning_rate": 0.003,
      "loss": 3.9928,
      "step": 12328
    },
    {
      "epoch": 0.12329,
      "grad_norm": 1.1876948176946969,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 12329
    },
    {
      "epoch": 0.1233,
      "grad_norm": 1.1120658251348128,
      "learning_rate": 0.003,
      "loss": 4.0054,
      "step": 12330
    },
    {
      "epoch": 0.12331,
      "grad_norm": 1.2766533808324558,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 12331
    },
    {
      "epoch": 0.12332,
      "grad_norm": 1.0419909057814578,
      "learning_rate": 0.003,
      "loss": 4.0039,
      "step": 12332
    },
    {
      "epoch": 0.12333,
      "grad_norm": 1.2900410395289634,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 12333
    },
    {
      "epoch": 0.12334,
      "grad_norm": 1.194625300357013,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 12334
    },
    {
      "epoch": 0.12335,
      "grad_norm": 1.3193993205603087,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 12335
    },
    {
      "epoch": 0.12336,
      "grad_norm": 1.341608532912959,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 12336
    },
    {
      "epoch": 0.12337,
      "grad_norm": 1.1486692164623977,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 12337
    },
    {
      "epoch": 0.12338,
      "grad_norm": 1.2242326996905912,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 12338
    },
    {
      "epoch": 0.12339,
      "grad_norm": 1.0095451474374366,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 12339
    },
    {
      "epoch": 0.1234,
      "grad_norm": 1.3942157585638366,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 12340
    },
    {
      "epoch": 0.12341,
      "grad_norm": 0.9436108135164665,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 12341
    },
    {
      "epoch": 0.12342,
      "grad_norm": 1.1302393729306983,
      "learning_rate": 0.003,
      "loss": 4.0075,
      "step": 12342
    },
    {
      "epoch": 0.12343,
      "grad_norm": 1.3295607670812282,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 12343
    },
    {
      "epoch": 0.12344,
      "grad_norm": 1.194555142662113,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 12344
    },
    {
      "epoch": 0.12345,
      "grad_norm": 1.3101897952776536,
      "learning_rate": 0.003,
      "loss": 3.9911,
      "step": 12345
    },
    {
      "epoch": 0.12346,
      "grad_norm": 0.9886687025758016,
      "learning_rate": 0.003,
      "loss": 4.0022,
      "step": 12346
    },
    {
      "epoch": 0.12347,
      "grad_norm": 1.048339053066772,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 12347
    },
    {
      "epoch": 0.12348,
      "grad_norm": 1.384508645610422,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 12348
    },
    {
      "epoch": 0.12349,
      "grad_norm": 1.0486243460691573,
      "learning_rate": 0.003,
      "loss": 4.0099,
      "step": 12349
    },
    {
      "epoch": 0.1235,
      "grad_norm": 1.2688673285137406,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 12350
    },
    {
      "epoch": 0.12351,
      "grad_norm": 0.9688609196568242,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 12351
    },
    {
      "epoch": 0.12352,
      "grad_norm": 1.3053088708802922,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 12352
    },
    {
      "epoch": 0.12353,
      "grad_norm": 1.012117558257402,
      "learning_rate": 0.003,
      "loss": 4.0029,
      "step": 12353
    },
    {
      "epoch": 0.12354,
      "grad_norm": 1.4775072585954483,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 12354
    },
    {
      "epoch": 0.12355,
      "grad_norm": 0.9673416007518341,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 12355
    },
    {
      "epoch": 0.12356,
      "grad_norm": 1.1498611705148574,
      "learning_rate": 0.003,
      "loss": 4.0129,
      "step": 12356
    },
    {
      "epoch": 0.12357,
      "grad_norm": 0.9580734048464713,
      "learning_rate": 0.003,
      "loss": 4.0074,
      "step": 12357
    },
    {
      "epoch": 0.12358,
      "grad_norm": 1.3371631264382817,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 12358
    },
    {
      "epoch": 0.12359,
      "grad_norm": 1.139746107919948,
      "learning_rate": 0.003,
      "loss": 4.0058,
      "step": 12359
    },
    {
      "epoch": 0.1236,
      "grad_norm": 1.2004566124253264,
      "learning_rate": 0.003,
      "loss": 4.0098,
      "step": 12360
    },
    {
      "epoch": 0.12361,
      "grad_norm": 1.1116903585599232,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 12361
    },
    {
      "epoch": 0.12362,
      "grad_norm": 1.0685972050756347,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 12362
    },
    {
      "epoch": 0.12363,
      "grad_norm": 1.0483608668790687,
      "learning_rate": 0.003,
      "loss": 4.015,
      "step": 12363
    },
    {
      "epoch": 0.12364,
      "grad_norm": 1.4918723329058845,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 12364
    },
    {
      "epoch": 0.12365,
      "grad_norm": 1.0178238571752845,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 12365
    },
    {
      "epoch": 0.12366,
      "grad_norm": 1.3030151120524025,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 12366
    },
    {
      "epoch": 0.12367,
      "grad_norm": 1.141857602951121,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 12367
    },
    {
      "epoch": 0.12368,
      "grad_norm": 1.2565604375779478,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 12368
    },
    {
      "epoch": 0.12369,
      "grad_norm": 1.214027777384337,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 12369
    },
    {
      "epoch": 0.1237,
      "grad_norm": 1.0915755444135937,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 12370
    },
    {
      "epoch": 0.12371,
      "grad_norm": 1.1867414654709663,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 12371
    },
    {
      "epoch": 0.12372,
      "grad_norm": 1.0546431117703352,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 12372
    },
    {
      "epoch": 0.12373,
      "grad_norm": 1.4647953108218288,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 12373
    },
    {
      "epoch": 0.12374,
      "grad_norm": 1.04762106343006,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 12374
    },
    {
      "epoch": 0.12375,
      "grad_norm": 1.1963079339111795,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 12375
    },
    {
      "epoch": 0.12376,
      "grad_norm": 1.1499302069922561,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 12376
    },
    {
      "epoch": 0.12377,
      "grad_norm": 1.0968516310925092,
      "learning_rate": 0.003,
      "loss": 3.9928,
      "step": 12377
    },
    {
      "epoch": 0.12378,
      "grad_norm": 1.385819083799469,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12378
    },
    {
      "epoch": 0.12379,
      "grad_norm": 1.1485024350100335,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 12379
    },
    {
      "epoch": 0.1238,
      "grad_norm": 1.0950675066447881,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 12380
    },
    {
      "epoch": 0.12381,
      "grad_norm": 1.1638382018028037,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 12381
    },
    {
      "epoch": 0.12382,
      "grad_norm": 1.129457787007263,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 12382
    },
    {
      "epoch": 0.12383,
      "grad_norm": 1.1036814048890862,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 12383
    },
    {
      "epoch": 0.12384,
      "grad_norm": 1.2376100778668477,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 12384
    },
    {
      "epoch": 0.12385,
      "grad_norm": 1.1084913141813695,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 12385
    },
    {
      "epoch": 0.12386,
      "grad_norm": 1.178199736592679,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 12386
    },
    {
      "epoch": 0.12387,
      "grad_norm": 1.1779043826583042,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 12387
    },
    {
      "epoch": 0.12388,
      "grad_norm": 1.2916996558932137,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 12388
    },
    {
      "epoch": 0.12389,
      "grad_norm": 1.0404310442375322,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 12389
    },
    {
      "epoch": 0.1239,
      "grad_norm": 1.2897160473158022,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 12390
    },
    {
      "epoch": 0.12391,
      "grad_norm": 1.1756647311322306,
      "learning_rate": 0.003,
      "loss": 3.9902,
      "step": 12391
    },
    {
      "epoch": 0.12392,
      "grad_norm": 1.1316623880446204,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 12392
    },
    {
      "epoch": 0.12393,
      "grad_norm": 1.238766697242648,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 12393
    },
    {
      "epoch": 0.12394,
      "grad_norm": 1.088911073223708,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 12394
    },
    {
      "epoch": 0.12395,
      "grad_norm": 1.1453594979436852,
      "learning_rate": 0.003,
      "loss": 3.9919,
      "step": 12395
    },
    {
      "epoch": 0.12396,
      "grad_norm": 1.2257856525238398,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 12396
    },
    {
      "epoch": 0.12397,
      "grad_norm": 1.1932329492927123,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 12397
    },
    {
      "epoch": 0.12398,
      "grad_norm": 1.263429821271588,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 12398
    },
    {
      "epoch": 0.12399,
      "grad_norm": 1.1410816377835378,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 12399
    },
    {
      "epoch": 0.124,
      "grad_norm": 1.2684277262143193,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 12400
    },
    {
      "epoch": 0.12401,
      "grad_norm": 1.3163035517059634,
      "learning_rate": 0.003,
      "loss": 4.0104,
      "step": 12401
    },
    {
      "epoch": 0.12402,
      "grad_norm": 1.0710840595995579,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 12402
    },
    {
      "epoch": 0.12403,
      "grad_norm": 1.3799174340236744,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 12403
    },
    {
      "epoch": 0.12404,
      "grad_norm": 0.9632650030245177,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 12404
    },
    {
      "epoch": 0.12405,
      "grad_norm": 1.5065254113151083,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 12405
    },
    {
      "epoch": 0.12406,
      "grad_norm": 1.2014138932290637,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 12406
    },
    {
      "epoch": 0.12407,
      "grad_norm": 1.3139679781057607,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 12407
    },
    {
      "epoch": 0.12408,
      "grad_norm": 0.9751484488901144,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 12408
    },
    {
      "epoch": 0.12409,
      "grad_norm": 1.3002152359013281,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 12409
    },
    {
      "epoch": 0.1241,
      "grad_norm": 1.0966298279249171,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 12410
    },
    {
      "epoch": 0.12411,
      "grad_norm": 1.2426091821974081,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 12411
    },
    {
      "epoch": 0.12412,
      "grad_norm": 1.176010755554183,
      "learning_rate": 0.003,
      "loss": 4.0055,
      "step": 12412
    },
    {
      "epoch": 0.12413,
      "grad_norm": 1.286184335849363,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 12413
    },
    {
      "epoch": 0.12414,
      "grad_norm": 1.1895946884822244,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 12414
    },
    {
      "epoch": 0.12415,
      "grad_norm": 1.1707059442835113,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 12415
    },
    {
      "epoch": 0.12416,
      "grad_norm": 0.9363912878676401,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 12416
    },
    {
      "epoch": 0.12417,
      "grad_norm": 1.046953318675986,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 12417
    },
    {
      "epoch": 0.12418,
      "grad_norm": 1.317720438395889,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 12418
    },
    {
      "epoch": 0.12419,
      "grad_norm": 1.0657548477063905,
      "learning_rate": 0.003,
      "loss": 4.0075,
      "step": 12419
    },
    {
      "epoch": 0.1242,
      "grad_norm": 1.1089272329972197,
      "learning_rate": 0.003,
      "loss": 3.9914,
      "step": 12420
    },
    {
      "epoch": 0.12421,
      "grad_norm": 1.3485884933715704,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 12421
    },
    {
      "epoch": 0.12422,
      "grad_norm": 1.222601076789822,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 12422
    },
    {
      "epoch": 0.12423,
      "grad_norm": 1.3597254489602004,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 12423
    },
    {
      "epoch": 0.12424,
      "grad_norm": 1.1677550597946742,
      "learning_rate": 0.003,
      "loss": 3.9886,
      "step": 12424
    },
    {
      "epoch": 0.12425,
      "grad_norm": 1.1477029620568557,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 12425
    },
    {
      "epoch": 0.12426,
      "grad_norm": 1.2385626889982522,
      "learning_rate": 0.003,
      "loss": 4.0116,
      "step": 12426
    },
    {
      "epoch": 0.12427,
      "grad_norm": 0.9655680600257277,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 12427
    },
    {
      "epoch": 0.12428,
      "grad_norm": 1.4020376789614961,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 12428
    },
    {
      "epoch": 0.12429,
      "grad_norm": 1.1747627795440823,
      "learning_rate": 0.003,
      "loss": 4.0054,
      "step": 12429
    },
    {
      "epoch": 0.1243,
      "grad_norm": 1.3579254765229938,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 12430
    },
    {
      "epoch": 0.12431,
      "grad_norm": 0.9294601734714742,
      "learning_rate": 0.003,
      "loss": 4.0115,
      "step": 12431
    },
    {
      "epoch": 0.12432,
      "grad_norm": 1.2390178128721387,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 12432
    },
    {
      "epoch": 0.12433,
      "grad_norm": 0.9563357941981651,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 12433
    },
    {
      "epoch": 0.12434,
      "grad_norm": 1.3376711308452687,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 12434
    },
    {
      "epoch": 0.12435,
      "grad_norm": 1.0536428112043779,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 12435
    },
    {
      "epoch": 0.12436,
      "grad_norm": 1.30700540627271,
      "learning_rate": 0.003,
      "loss": 4.0013,
      "step": 12436
    },
    {
      "epoch": 0.12437,
      "grad_norm": 1.1830783159612694,
      "learning_rate": 0.003,
      "loss": 4.0056,
      "step": 12437
    },
    {
      "epoch": 0.12438,
      "grad_norm": 1.0559360395390067,
      "learning_rate": 0.003,
      "loss": 3.9997,
      "step": 12438
    },
    {
      "epoch": 0.12439,
      "grad_norm": 1.3789067519353553,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 12439
    },
    {
      "epoch": 0.1244,
      "grad_norm": 1.0241235840308207,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 12440
    },
    {
      "epoch": 0.12441,
      "grad_norm": 1.3409960385311457,
      "learning_rate": 0.003,
      "loss": 4.0049,
      "step": 12441
    },
    {
      "epoch": 0.12442,
      "grad_norm": 0.9732899619983509,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 12442
    },
    {
      "epoch": 0.12443,
      "grad_norm": 1.2486871041590437,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 12443
    },
    {
      "epoch": 0.12444,
      "grad_norm": 1.2134843237041595,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 12444
    },
    {
      "epoch": 0.12445,
      "grad_norm": 1.269281860860668,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 12445
    },
    {
      "epoch": 0.12446,
      "grad_norm": 1.3188318275217783,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 12446
    },
    {
      "epoch": 0.12447,
      "grad_norm": 1.1481372478074385,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 12447
    },
    {
      "epoch": 0.12448,
      "grad_norm": 1.2369333239119498,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 12448
    },
    {
      "epoch": 0.12449,
      "grad_norm": 1.2694235265392477,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 12449
    },
    {
      "epoch": 0.1245,
      "grad_norm": 1.1317858601417061,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 12450
    },
    {
      "epoch": 0.12451,
      "grad_norm": 1.074137700501863,
      "learning_rate": 0.003,
      "loss": 3.9826,
      "step": 12451
    },
    {
      "epoch": 0.12452,
      "grad_norm": 1.2885266855909137,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 12452
    },
    {
      "epoch": 0.12453,
      "grad_norm": 1.0396878994563437,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 12453
    },
    {
      "epoch": 0.12454,
      "grad_norm": 1.4662506221052487,
      "learning_rate": 0.003,
      "loss": 4.0021,
      "step": 12454
    },
    {
      "epoch": 0.12455,
      "grad_norm": 0.9532945933163989,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 12455
    },
    {
      "epoch": 0.12456,
      "grad_norm": 1.2584449951050858,
      "learning_rate": 0.003,
      "loss": 4.003,
      "step": 12456
    },
    {
      "epoch": 0.12457,
      "grad_norm": 1.1411104987476233,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 12457
    },
    {
      "epoch": 0.12458,
      "grad_norm": 1.1800010265166672,
      "learning_rate": 0.003,
      "loss": 4.0057,
      "step": 12458
    },
    {
      "epoch": 0.12459,
      "grad_norm": 1.2595512640292814,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 12459
    },
    {
      "epoch": 0.1246,
      "grad_norm": 1.1244644633947707,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 12460
    },
    {
      "epoch": 0.12461,
      "grad_norm": 1.4047730844954986,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 12461
    },
    {
      "epoch": 0.12462,
      "grad_norm": 1.065955962651246,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 12462
    },
    {
      "epoch": 0.12463,
      "grad_norm": 1.23712644506417,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 12463
    },
    {
      "epoch": 0.12464,
      "grad_norm": 1.048205712021745,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 12464
    },
    {
      "epoch": 0.12465,
      "grad_norm": 1.2119882374602933,
      "learning_rate": 0.003,
      "loss": 4.0046,
      "step": 12465
    },
    {
      "epoch": 0.12466,
      "grad_norm": 1.0136502432256684,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 12466
    },
    {
      "epoch": 0.12467,
      "grad_norm": 1.3500074196062515,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 12467
    },
    {
      "epoch": 0.12468,
      "grad_norm": 1.1123455140298126,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 12468
    },
    {
      "epoch": 0.12469,
      "grad_norm": 1.1752785623116864,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 12469
    },
    {
      "epoch": 0.1247,
      "grad_norm": 1.0239397030257342,
      "learning_rate": 0.003,
      "loss": 3.994,
      "step": 12470
    },
    {
      "epoch": 0.12471,
      "grad_norm": 1.3169454051060006,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 12471
    },
    {
      "epoch": 0.12472,
      "grad_norm": 1.076334168779626,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 12472
    },
    {
      "epoch": 0.12473,
      "grad_norm": 1.1950416553824108,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 12473
    },
    {
      "epoch": 0.12474,
      "grad_norm": 1.3179792713272818,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 12474
    },
    {
      "epoch": 0.12475,
      "grad_norm": 1.245756788047074,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 12475
    },
    {
      "epoch": 0.12476,
      "grad_norm": 1.1900834831639637,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 12476
    },
    {
      "epoch": 0.12477,
      "grad_norm": 1.2777258402774527,
      "learning_rate": 0.003,
      "loss": 4.0034,
      "step": 12477
    },
    {
      "epoch": 0.12478,
      "grad_norm": 1.3540644784807185,
      "learning_rate": 0.003,
      "loss": 4.0028,
      "step": 12478
    },
    {
      "epoch": 0.12479,
      "grad_norm": 1.250859757659591,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 12479
    },
    {
      "epoch": 0.1248,
      "grad_norm": 1.0011661308076665,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 12480
    },
    {
      "epoch": 0.12481,
      "grad_norm": 1.0944248906550968,
      "learning_rate": 0.003,
      "loss": 4.0035,
      "step": 12481
    },
    {
      "epoch": 0.12482,
      "grad_norm": 1.0766623803287114,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 12482
    },
    {
      "epoch": 0.12483,
      "grad_norm": 1.247377497569275,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 12483
    },
    {
      "epoch": 0.12484,
      "grad_norm": 1.2590475389724518,
      "learning_rate": 0.003,
      "loss": 3.985,
      "step": 12484
    },
    {
      "epoch": 0.12485,
      "grad_norm": 1.2563221922601575,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 12485
    },
    {
      "epoch": 0.12486,
      "grad_norm": 1.366666755852874,
      "learning_rate": 0.003,
      "loss": 4.0058,
      "step": 12486
    },
    {
      "epoch": 0.12487,
      "grad_norm": 1.0391130894161864,
      "learning_rate": 0.003,
      "loss": 3.9849,
      "step": 12487
    },
    {
      "epoch": 0.12488,
      "grad_norm": 1.252739650762261,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 12488
    },
    {
      "epoch": 0.12489,
      "grad_norm": 1.1169828804543638,
      "learning_rate": 0.003,
      "loss": 4.005,
      "step": 12489
    },
    {
      "epoch": 0.1249,
      "grad_norm": 1.1550846912653996,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 12490
    },
    {
      "epoch": 0.12491,
      "grad_norm": 1.2273757973022745,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 12491
    },
    {
      "epoch": 0.12492,
      "grad_norm": 1.138029638011022,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 12492
    },
    {
      "epoch": 0.12493,
      "grad_norm": 1.1633599584548278,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 12493
    },
    {
      "epoch": 0.12494,
      "grad_norm": 1.1071951401003601,
      "learning_rate": 0.003,
      "loss": 4.0048,
      "step": 12494
    },
    {
      "epoch": 0.12495,
      "grad_norm": 1.1728962633082556,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 12495
    },
    {
      "epoch": 0.12496,
      "grad_norm": 1.1412993656234558,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 12496
    },
    {
      "epoch": 0.12497,
      "grad_norm": 1.1836701462957342,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 12497
    },
    {
      "epoch": 0.12498,
      "grad_norm": 1.3444553399522738,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 12498
    },
    {
      "epoch": 0.12499,
      "grad_norm": 1.055299434095236,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 12499
    },
    {
      "epoch": 0.125,
      "grad_norm": 1.1681452892889812,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 12500
    },
    {
      "epoch": 0.12501,
      "grad_norm": 1.1566911568476128,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 12501
    },
    {
      "epoch": 0.12502,
      "grad_norm": 1.2648808509254572,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 12502
    },
    {
      "epoch": 0.12503,
      "grad_norm": 1.1849013409331248,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 12503
    },
    {
      "epoch": 0.12504,
      "grad_norm": 1.2885672439504576,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 12504
    },
    {
      "epoch": 0.12505,
      "grad_norm": 1.1165062827533234,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 12505
    },
    {
      "epoch": 0.12506,
      "grad_norm": 1.312246783818512,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 12506
    },
    {
      "epoch": 0.12507,
      "grad_norm": 1.3021217260244191,
      "learning_rate": 0.003,
      "loss": 4.0039,
      "step": 12507
    },
    {
      "epoch": 0.12508,
      "grad_norm": 1.080380448706609,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 12508
    },
    {
      "epoch": 0.12509,
      "grad_norm": 1.4439839528646126,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 12509
    },
    {
      "epoch": 0.1251,
      "grad_norm": 1.0678739097641072,
      "learning_rate": 0.003,
      "loss": 3.9946,
      "step": 12510
    },
    {
      "epoch": 0.12511,
      "grad_norm": 1.239905906705609,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 12511
    },
    {
      "epoch": 0.12512,
      "grad_norm": 1.0887393820962412,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 12512
    },
    {
      "epoch": 0.12513,
      "grad_norm": 1.1044326141571423,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 12513
    },
    {
      "epoch": 0.12514,
      "grad_norm": 1.296916204033871,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 12514
    },
    {
      "epoch": 0.12515,
      "grad_norm": 1.1111976158805856,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 12515
    },
    {
      "epoch": 0.12516,
      "grad_norm": 1.4345634992074552,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 12516
    },
    {
      "epoch": 0.12517,
      "grad_norm": 1.0038106730731644,
      "learning_rate": 0.003,
      "loss": 3.9899,
      "step": 12517
    },
    {
      "epoch": 0.12518,
      "grad_norm": 1.233815776710949,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 12518
    },
    {
      "epoch": 0.12519,
      "grad_norm": 1.0644914590833376,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 12519
    },
    {
      "epoch": 0.1252,
      "grad_norm": 1.1510047747949075,
      "learning_rate": 0.003,
      "loss": 3.9852,
      "step": 12520
    },
    {
      "epoch": 0.12521,
      "grad_norm": 1.2458741278187768,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 12521
    },
    {
      "epoch": 0.12522,
      "grad_norm": 1.2658284858594022,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 12522
    },
    {
      "epoch": 0.12523,
      "grad_norm": 1.0684459848883463,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 12523
    },
    {
      "epoch": 0.12524,
      "grad_norm": 1.1841532183619132,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 12524
    },
    {
      "epoch": 0.12525,
      "grad_norm": 1.2159923676158797,
      "learning_rate": 0.003,
      "loss": 3.9991,
      "step": 12525
    },
    {
      "epoch": 0.12526,
      "grad_norm": 1.3025781911250225,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 12526
    },
    {
      "epoch": 0.12527,
      "grad_norm": 1.0656354531638161,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 12527
    },
    {
      "epoch": 0.12528,
      "grad_norm": 1.5808452423937671,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 12528
    },
    {
      "epoch": 0.12529,
      "grad_norm": 1.1645359316338186,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 12529
    },
    {
      "epoch": 0.1253,
      "grad_norm": 1.0732087825270942,
      "learning_rate": 0.003,
      "loss": 4.0041,
      "step": 12530
    },
    {
      "epoch": 0.12531,
      "grad_norm": 1.1743639789562694,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 12531
    },
    {
      "epoch": 0.12532,
      "grad_norm": 1.1258543261386147,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 12532
    },
    {
      "epoch": 0.12533,
      "grad_norm": 1.2074492118607754,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 12533
    },
    {
      "epoch": 0.12534,
      "grad_norm": 1.1335294582199398,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 12534
    },
    {
      "epoch": 0.12535,
      "grad_norm": 1.393436023522577,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 12535
    },
    {
      "epoch": 0.12536,
      "grad_norm": 0.8943166326444245,
      "learning_rate": 0.003,
      "loss": 3.9654,
      "step": 12536
    },
    {
      "epoch": 0.12537,
      "grad_norm": 1.0287110754049926,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 12537
    },
    {
      "epoch": 0.12538,
      "grad_norm": 1.255488963677731,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 12538
    },
    {
      "epoch": 0.12539,
      "grad_norm": 1.0084876103414888,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 12539
    },
    {
      "epoch": 0.1254,
      "grad_norm": 1.4860849068921025,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 12540
    },
    {
      "epoch": 0.12541,
      "grad_norm": 0.9581597732122125,
      "learning_rate": 0.003,
      "loss": 4.0013,
      "step": 12541
    },
    {
      "epoch": 0.12542,
      "grad_norm": 1.4035696364661694,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 12542
    },
    {
      "epoch": 0.12543,
      "grad_norm": 1.0541027313016913,
      "learning_rate": 0.003,
      "loss": 3.99,
      "step": 12543
    },
    {
      "epoch": 0.12544,
      "grad_norm": 1.1821544967346649,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 12544
    },
    {
      "epoch": 0.12545,
      "grad_norm": 1.2469621369638744,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 12545
    },
    {
      "epoch": 0.12546,
      "grad_norm": 1.4233370848984432,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 12546
    },
    {
      "epoch": 0.12547,
      "grad_norm": 1.0233959256284908,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 12547
    },
    {
      "epoch": 0.12548,
      "grad_norm": 1.242054122735897,
      "learning_rate": 0.003,
      "loss": 4.0112,
      "step": 12548
    },
    {
      "epoch": 0.12549,
      "grad_norm": 1.1070600937614252,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 12549
    },
    {
      "epoch": 0.1255,
      "grad_norm": 1.1087147779990503,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 12550
    },
    {
      "epoch": 0.12551,
      "grad_norm": 1.1665980055570004,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 12551
    },
    {
      "epoch": 0.12552,
      "grad_norm": 1.1086804659085676,
      "learning_rate": 0.003,
      "loss": 3.9946,
      "step": 12552
    },
    {
      "epoch": 0.12553,
      "grad_norm": 1.3121353361094357,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 12553
    },
    {
      "epoch": 0.12554,
      "grad_norm": 1.0719336426573771,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 12554
    },
    {
      "epoch": 0.12555,
      "grad_norm": 1.1223239153580142,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 12555
    },
    {
      "epoch": 0.12556,
      "grad_norm": 1.3267498467952836,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 12556
    },
    {
      "epoch": 0.12557,
      "grad_norm": 1.3268383865328555,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 12557
    },
    {
      "epoch": 0.12558,
      "grad_norm": 1.4931899187677369,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 12558
    },
    {
      "epoch": 0.12559,
      "grad_norm": 1.053612760407219,
      "learning_rate": 0.003,
      "loss": 4.0015,
      "step": 12559
    },
    {
      "epoch": 0.1256,
      "grad_norm": 1.3211965283864615,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 12560
    },
    {
      "epoch": 0.12561,
      "grad_norm": 1.2521128264959986,
      "learning_rate": 0.003,
      "loss": 3.9995,
      "step": 12561
    },
    {
      "epoch": 0.12562,
      "grad_norm": 1.3252559218711522,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 12562
    },
    {
      "epoch": 0.12563,
      "grad_norm": 1.1034688777336168,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 12563
    },
    {
      "epoch": 0.12564,
      "grad_norm": 1.245795847005279,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 12564
    },
    {
      "epoch": 0.12565,
      "grad_norm": 1.0799959151676444,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 12565
    },
    {
      "epoch": 0.12566,
      "grad_norm": 1.1399011458856696,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 12566
    },
    {
      "epoch": 0.12567,
      "grad_norm": 1.1637302807696905,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 12567
    },
    {
      "epoch": 0.12568,
      "grad_norm": 1.3881251582640086,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 12568
    },
    {
      "epoch": 0.12569,
      "grad_norm": 0.9148944974251467,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 12569
    },
    {
      "epoch": 0.1257,
      "grad_norm": 1.3720674020991435,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 12570
    },
    {
      "epoch": 0.12571,
      "grad_norm": 1.1589754593303436,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 12571
    },
    {
      "epoch": 0.12572,
      "grad_norm": 1.288935836145991,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 12572
    },
    {
      "epoch": 0.12573,
      "grad_norm": 1.3761421534077674,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 12573
    },
    {
      "epoch": 0.12574,
      "grad_norm": 1.0527095557807116,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 12574
    },
    {
      "epoch": 0.12575,
      "grad_norm": 1.2462154382753858,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 12575
    },
    {
      "epoch": 0.12576,
      "grad_norm": 1.1767019385647244,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 12576
    },
    {
      "epoch": 0.12577,
      "grad_norm": 1.2310642908036986,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 12577
    },
    {
      "epoch": 0.12578,
      "grad_norm": 1.0681347288973733,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 12578
    },
    {
      "epoch": 0.12579,
      "grad_norm": 1.200121848817965,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 12579
    },
    {
      "epoch": 0.1258,
      "grad_norm": 1.0547714047591237,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 12580
    },
    {
      "epoch": 0.12581,
      "grad_norm": 1.399432745523962,
      "learning_rate": 0.003,
      "loss": 4.0004,
      "step": 12581
    },
    {
      "epoch": 0.12582,
      "grad_norm": 0.9891791775910421,
      "learning_rate": 0.003,
      "loss": 3.9918,
      "step": 12582
    },
    {
      "epoch": 0.12583,
      "grad_norm": 1.2865457649563121,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 12583
    },
    {
      "epoch": 0.12584,
      "grad_norm": 1.0318108268908344,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 12584
    },
    {
      "epoch": 0.12585,
      "grad_norm": 1.2833427957706554,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 12585
    },
    {
      "epoch": 0.12586,
      "grad_norm": 1.093722855738362,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 12586
    },
    {
      "epoch": 0.12587,
      "grad_norm": 1.1534754913393301,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 12587
    },
    {
      "epoch": 0.12588,
      "grad_norm": 1.3439763061886305,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 12588
    },
    {
      "epoch": 0.12589,
      "grad_norm": 1.2091801981891621,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 12589
    },
    {
      "epoch": 0.1259,
      "grad_norm": 1.1011477287499383,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 12590
    },
    {
      "epoch": 0.12591,
      "grad_norm": 1.1312006138545256,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 12591
    },
    {
      "epoch": 0.12592,
      "grad_norm": 1.1523264528504664,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 12592
    },
    {
      "epoch": 0.12593,
      "grad_norm": 1.255533301220495,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 12593
    },
    {
      "epoch": 0.12594,
      "grad_norm": 1.2058516957260115,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 12594
    },
    {
      "epoch": 0.12595,
      "grad_norm": 0.9821636958326797,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 12595
    },
    {
      "epoch": 0.12596,
      "grad_norm": 1.2542787438402092,
      "learning_rate": 0.003,
      "loss": 3.9961,
      "step": 12596
    },
    {
      "epoch": 0.12597,
      "grad_norm": 1.2121687973888942,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 12597
    },
    {
      "epoch": 0.12598,
      "grad_norm": 1.3235298233111423,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 12598
    },
    {
      "epoch": 0.12599,
      "grad_norm": 1.000802003255851,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 12599
    },
    {
      "epoch": 0.126,
      "grad_norm": 1.2650303129189735,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 12600
    },
    {
      "epoch": 0.12601,
      "grad_norm": 1.2518624621845913,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 12601
    },
    {
      "epoch": 0.12602,
      "grad_norm": 1.4759094497892915,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 12602
    },
    {
      "epoch": 0.12603,
      "grad_norm": 1.2729425663847465,
      "learning_rate": 0.003,
      "loss": 3.9825,
      "step": 12603
    },
    {
      "epoch": 0.12604,
      "grad_norm": 1.191215597044836,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 12604
    },
    {
      "epoch": 0.12605,
      "grad_norm": 1.2346027461305622,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 12605
    },
    {
      "epoch": 0.12606,
      "grad_norm": 1.0890178656815974,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 12606
    },
    {
      "epoch": 0.12607,
      "grad_norm": 1.2208264878432329,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 12607
    },
    {
      "epoch": 0.12608,
      "grad_norm": 1.0725139583720165,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 12608
    },
    {
      "epoch": 0.12609,
      "grad_norm": 1.0438403055998695,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 12609
    },
    {
      "epoch": 0.1261,
      "grad_norm": 1.423012084634035,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 12610
    },
    {
      "epoch": 0.12611,
      "grad_norm": 0.9883128164819032,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 12611
    },
    {
      "epoch": 0.12612,
      "grad_norm": 1.4515971588087238,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 12612
    },
    {
      "epoch": 0.12613,
      "grad_norm": 0.9323036378252141,
      "learning_rate": 0.003,
      "loss": 3.9899,
      "step": 12613
    },
    {
      "epoch": 0.12614,
      "grad_norm": 1.3119744482601774,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 12614
    },
    {
      "epoch": 0.12615,
      "grad_norm": 1.3243048548778869,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 12615
    },
    {
      "epoch": 0.12616,
      "grad_norm": 1.0687568535775176,
      "learning_rate": 0.003,
      "loss": 3.9991,
      "step": 12616
    },
    {
      "epoch": 0.12617,
      "grad_norm": 1.2329522629931724,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 12617
    },
    {
      "epoch": 0.12618,
      "grad_norm": 1.2447679099075022,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 12618
    },
    {
      "epoch": 0.12619,
      "grad_norm": 1.1574700071462647,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 12619
    },
    {
      "epoch": 0.1262,
      "grad_norm": 1.127223199736362,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 12620
    },
    {
      "epoch": 0.12621,
      "grad_norm": 1.2178554897602358,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 12621
    },
    {
      "epoch": 0.12622,
      "grad_norm": 1.5252586533504058,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 12622
    },
    {
      "epoch": 0.12623,
      "grad_norm": 0.9953370520103634,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 12623
    },
    {
      "epoch": 0.12624,
      "grad_norm": 1.2288885934582412,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 12624
    },
    {
      "epoch": 0.12625,
      "grad_norm": 1.2039239879403167,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 12625
    },
    {
      "epoch": 0.12626,
      "grad_norm": 1.0854130128519532,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 12626
    },
    {
      "epoch": 0.12627,
      "grad_norm": 1.1745590891614175,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 12627
    },
    {
      "epoch": 0.12628,
      "grad_norm": 1.1566678215804154,
      "learning_rate": 0.003,
      "loss": 4.0004,
      "step": 12628
    },
    {
      "epoch": 0.12629,
      "grad_norm": 1.233599095010633,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 12629
    },
    {
      "epoch": 0.1263,
      "grad_norm": 1.270380200195146,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 12630
    },
    {
      "epoch": 0.12631,
      "grad_norm": 1.1062440334986967,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 12631
    },
    {
      "epoch": 0.12632,
      "grad_norm": 1.3055152614095051,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 12632
    },
    {
      "epoch": 0.12633,
      "grad_norm": 1.0713326524662674,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 12633
    },
    {
      "epoch": 0.12634,
      "grad_norm": 1.328625658674953,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 12634
    },
    {
      "epoch": 0.12635,
      "grad_norm": 0.9992586114619201,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 12635
    },
    {
      "epoch": 0.12636,
      "grad_norm": 1.4006756734928287,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 12636
    },
    {
      "epoch": 0.12637,
      "grad_norm": 1.0884890054982994,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 12637
    },
    {
      "epoch": 0.12638,
      "grad_norm": 1.2337201777333826,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 12638
    },
    {
      "epoch": 0.12639,
      "grad_norm": 1.1474779657673895,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 12639
    },
    {
      "epoch": 0.1264,
      "grad_norm": 1.2550079297749897,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 12640
    },
    {
      "epoch": 0.12641,
      "grad_norm": 1.2570253479523554,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 12641
    },
    {
      "epoch": 0.12642,
      "grad_norm": 1.2545340832026206,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 12642
    },
    {
      "epoch": 0.12643,
      "grad_norm": 1.1479312545047873,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 12643
    },
    {
      "epoch": 0.12644,
      "grad_norm": 1.3820539223152442,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 12644
    },
    {
      "epoch": 0.12645,
      "grad_norm": 1.0105513446878904,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 12645
    },
    {
      "epoch": 0.12646,
      "grad_norm": 1.2048119372599568,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 12646
    },
    {
      "epoch": 0.12647,
      "grad_norm": 1.010959049838853,
      "learning_rate": 0.003,
      "loss": 4.0031,
      "step": 12647
    },
    {
      "epoch": 0.12648,
      "grad_norm": 1.2821540434209036,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 12648
    },
    {
      "epoch": 0.12649,
      "grad_norm": 1.0810220087640319,
      "learning_rate": 0.003,
      "loss": 3.9971,
      "step": 12649
    },
    {
      "epoch": 0.1265,
      "grad_norm": 1.204458455749565,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 12650
    },
    {
      "epoch": 0.12651,
      "grad_norm": 0.9792762762076124,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 12651
    },
    {
      "epoch": 0.12652,
      "grad_norm": 1.5682234906370067,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 12652
    },
    {
      "epoch": 0.12653,
      "grad_norm": 0.8164817667847991,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 12653
    },
    {
      "epoch": 0.12654,
      "grad_norm": 1.2008138076892603,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 12654
    },
    {
      "epoch": 0.12655,
      "grad_norm": 1.2201295724632517,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 12655
    },
    {
      "epoch": 0.12656,
      "grad_norm": 1.3418859859435595,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 12656
    },
    {
      "epoch": 0.12657,
      "grad_norm": 1.2531819255827479,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 12657
    },
    {
      "epoch": 0.12658,
      "grad_norm": 1.2676038809684318,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 12658
    },
    {
      "epoch": 0.12659,
      "grad_norm": 1.0091691228634534,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 12659
    },
    {
      "epoch": 0.1266,
      "grad_norm": 1.2238608274423066,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 12660
    },
    {
      "epoch": 0.12661,
      "grad_norm": 0.9105821714736707,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 12661
    },
    {
      "epoch": 0.12662,
      "grad_norm": 1.2446462516408656,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 12662
    },
    {
      "epoch": 0.12663,
      "grad_norm": 1.3444189852230655,
      "learning_rate": 0.003,
      "loss": 4.0144,
      "step": 12663
    },
    {
      "epoch": 0.12664,
      "grad_norm": 1.215579372076641,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 12664
    },
    {
      "epoch": 0.12665,
      "grad_norm": 1.1894756167487517,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 12665
    },
    {
      "epoch": 0.12666,
      "grad_norm": 1.0796402434304548,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 12666
    },
    {
      "epoch": 0.12667,
      "grad_norm": 1.2729192473905775,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 12667
    },
    {
      "epoch": 0.12668,
      "grad_norm": 1.148234082366681,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 12668
    },
    {
      "epoch": 0.12669,
      "grad_norm": 1.1418967246731544,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 12669
    },
    {
      "epoch": 0.1267,
      "grad_norm": 1.2700756456120366,
      "learning_rate": 0.003,
      "loss": 3.9987,
      "step": 12670
    },
    {
      "epoch": 0.12671,
      "grad_norm": 1.0764358520395991,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 12671
    },
    {
      "epoch": 0.12672,
      "grad_norm": 1.409785311657584,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 12672
    },
    {
      "epoch": 0.12673,
      "grad_norm": 1.1401916417528815,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 12673
    },
    {
      "epoch": 0.12674,
      "grad_norm": 1.1669071703585918,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 12674
    },
    {
      "epoch": 0.12675,
      "grad_norm": 1.1980384256902983,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 12675
    },
    {
      "epoch": 0.12676,
      "grad_norm": 1.2130331742254143,
      "learning_rate": 0.003,
      "loss": 4.0091,
      "step": 12676
    },
    {
      "epoch": 0.12677,
      "grad_norm": 1.1165493248220288,
      "learning_rate": 0.003,
      "loss": 3.9962,
      "step": 12677
    },
    {
      "epoch": 0.12678,
      "grad_norm": 1.3433766034805201,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 12678
    },
    {
      "epoch": 0.12679,
      "grad_norm": 1.1252445488214073,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 12679
    },
    {
      "epoch": 0.1268,
      "grad_norm": 1.0754874878584735,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 12680
    },
    {
      "epoch": 0.12681,
      "grad_norm": 1.1485296840321575,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 12681
    },
    {
      "epoch": 0.12682,
      "grad_norm": 1.1760583571845962,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 12682
    },
    {
      "epoch": 0.12683,
      "grad_norm": 1.0880121539775605,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 12683
    },
    {
      "epoch": 0.12684,
      "grad_norm": 1.3040924045912328,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 12684
    },
    {
      "epoch": 0.12685,
      "grad_norm": 1.1876837315600164,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 12685
    },
    {
      "epoch": 0.12686,
      "grad_norm": 1.3387931565528755,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 12686
    },
    {
      "epoch": 0.12687,
      "grad_norm": 1.0300693277032922,
      "learning_rate": 0.003,
      "loss": 3.9966,
      "step": 12687
    },
    {
      "epoch": 0.12688,
      "grad_norm": 1.297604962549656,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 12688
    },
    {
      "epoch": 0.12689,
      "grad_norm": 1.100287623472564,
      "learning_rate": 0.003,
      "loss": 4.0073,
      "step": 12689
    },
    {
      "epoch": 0.1269,
      "grad_norm": 1.2595777690018868,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 12690
    },
    {
      "epoch": 0.12691,
      "grad_norm": 1.1988396498532445,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 12691
    },
    {
      "epoch": 0.12692,
      "grad_norm": 1.130646827975255,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 12692
    },
    {
      "epoch": 0.12693,
      "grad_norm": 1.1694215564141754,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 12693
    },
    {
      "epoch": 0.12694,
      "grad_norm": 1.2250507543257125,
      "learning_rate": 0.003,
      "loss": 3.9935,
      "step": 12694
    },
    {
      "epoch": 0.12695,
      "grad_norm": 1.205915660828806,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 12695
    },
    {
      "epoch": 0.12696,
      "grad_norm": 1.2956504501050932,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 12696
    },
    {
      "epoch": 0.12697,
      "grad_norm": 1.0210621754158544,
      "learning_rate": 0.003,
      "loss": 3.9882,
      "step": 12697
    },
    {
      "epoch": 0.12698,
      "grad_norm": 1.2599085606308953,
      "learning_rate": 0.003,
      "loss": 4.0007,
      "step": 12698
    },
    {
      "epoch": 0.12699,
      "grad_norm": 1.2736017966213373,
      "learning_rate": 0.003,
      "loss": 3.9961,
      "step": 12699
    },
    {
      "epoch": 0.127,
      "grad_norm": 1.069125521043092,
      "learning_rate": 0.003,
      "loss": 4.0059,
      "step": 12700
    },
    {
      "epoch": 0.12701,
      "grad_norm": 1.1512390118258113,
      "learning_rate": 0.003,
      "loss": 4.0054,
      "step": 12701
    },
    {
      "epoch": 0.12702,
      "grad_norm": 1.2300830756231789,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 12702
    },
    {
      "epoch": 0.12703,
      "grad_norm": 1.3934301726307217,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 12703
    },
    {
      "epoch": 0.12704,
      "grad_norm": 1.0242474018081544,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 12704
    },
    {
      "epoch": 0.12705,
      "grad_norm": 1.331495197064204,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 12705
    },
    {
      "epoch": 0.12706,
      "grad_norm": 1.1882006045530609,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 12706
    },
    {
      "epoch": 0.12707,
      "grad_norm": 1.2945057105662945,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 12707
    },
    {
      "epoch": 0.12708,
      "grad_norm": 1.1382027832201964,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 12708
    },
    {
      "epoch": 0.12709,
      "grad_norm": 1.4619644889251888,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 12709
    },
    {
      "epoch": 0.1271,
      "grad_norm": 0.8937466524403883,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 12710
    },
    {
      "epoch": 0.12711,
      "grad_norm": 1.0864349938474558,
      "learning_rate": 0.003,
      "loss": 4.012,
      "step": 12711
    },
    {
      "epoch": 0.12712,
      "grad_norm": 1.5497917839594966,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 12712
    },
    {
      "epoch": 0.12713,
      "grad_norm": 1.183683442872549,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 12713
    },
    {
      "epoch": 0.12714,
      "grad_norm": 1.3295079655733,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 12714
    },
    {
      "epoch": 0.12715,
      "grad_norm": 0.9693033253470243,
      "learning_rate": 0.003,
      "loss": 3.9939,
      "step": 12715
    },
    {
      "epoch": 0.12716,
      "grad_norm": 1.3432193381885342,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 12716
    },
    {
      "epoch": 0.12717,
      "grad_norm": 1.128924336563658,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 12717
    },
    {
      "epoch": 0.12718,
      "grad_norm": 1.2072489511705236,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 12718
    },
    {
      "epoch": 0.12719,
      "grad_norm": 1.2789377715316639,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 12719
    },
    {
      "epoch": 0.1272,
      "grad_norm": 1.1420326588382799,
      "learning_rate": 0.003,
      "loss": 3.9891,
      "step": 12720
    },
    {
      "epoch": 0.12721,
      "grad_norm": 1.1654815751718461,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 12721
    },
    {
      "epoch": 0.12722,
      "grad_norm": 1.4278123071241624,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 12722
    },
    {
      "epoch": 0.12723,
      "grad_norm": 1.0738923650659709,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 12723
    },
    {
      "epoch": 0.12724,
      "grad_norm": 1.3422297872267799,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 12724
    },
    {
      "epoch": 0.12725,
      "grad_norm": 0.9996258275269408,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 12725
    },
    {
      "epoch": 0.12726,
      "grad_norm": 1.310472670505521,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 12726
    },
    {
      "epoch": 0.12727,
      "grad_norm": 0.9766609124996919,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 12727
    },
    {
      "epoch": 0.12728,
      "grad_norm": 1.3007863463493645,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 12728
    },
    {
      "epoch": 0.12729,
      "grad_norm": 1.249819730679953,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 12729
    },
    {
      "epoch": 0.1273,
      "grad_norm": 1.0103597361587262,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 12730
    },
    {
      "epoch": 0.12731,
      "grad_norm": 1.3138143045183632,
      "learning_rate": 0.003,
      "loss": 4.003,
      "step": 12731
    },
    {
      "epoch": 0.12732,
      "grad_norm": 1.1308795357386041,
      "learning_rate": 0.003,
      "loss": 4.0022,
      "step": 12732
    },
    {
      "epoch": 0.12733,
      "grad_norm": 1.008945679356882,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 12733
    },
    {
      "epoch": 0.12734,
      "grad_norm": 1.3302092880616652,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 12734
    },
    {
      "epoch": 0.12735,
      "grad_norm": 1.264541944097203,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 12735
    },
    {
      "epoch": 0.12736,
      "grad_norm": 1.1314818993430544,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 12736
    },
    {
      "epoch": 0.12737,
      "grad_norm": 1.2516386048861714,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 12737
    },
    {
      "epoch": 0.12738,
      "grad_norm": 1.131131341631946,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 12738
    },
    {
      "epoch": 0.12739,
      "grad_norm": 1.156530685800842,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 12739
    },
    {
      "epoch": 0.1274,
      "grad_norm": 1.2694955448613015,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 12740
    },
    {
      "epoch": 0.12741,
      "grad_norm": 1.1550920436159788,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 12741
    },
    {
      "epoch": 0.12742,
      "grad_norm": 1.3406628844910542,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 12742
    },
    {
      "epoch": 0.12743,
      "grad_norm": 1.0580149423492489,
      "learning_rate": 0.003,
      "loss": 4.0085,
      "step": 12743
    },
    {
      "epoch": 0.12744,
      "grad_norm": 1.5671618568755727,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 12744
    },
    {
      "epoch": 0.12745,
      "grad_norm": 0.9666293075162874,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 12745
    },
    {
      "epoch": 0.12746,
      "grad_norm": 1.2172568887775441,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 12746
    },
    {
      "epoch": 0.12747,
      "grad_norm": 0.9342543557121035,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 12747
    },
    {
      "epoch": 0.12748,
      "grad_norm": 1.3005333446109317,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 12748
    },
    {
      "epoch": 0.12749,
      "grad_norm": 1.2021938544731907,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 12749
    },
    {
      "epoch": 0.1275,
      "grad_norm": 1.2152856263128113,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 12750
    },
    {
      "epoch": 0.12751,
      "grad_norm": 1.0638444261787958,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 12751
    },
    {
      "epoch": 0.12752,
      "grad_norm": 1.3389818000295493,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 12752
    },
    {
      "epoch": 0.12753,
      "grad_norm": 1.270707387228433,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 12753
    },
    {
      "epoch": 0.12754,
      "grad_norm": 1.13608709343689,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 12754
    },
    {
      "epoch": 0.12755,
      "grad_norm": 1.3147802356213878,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 12755
    },
    {
      "epoch": 0.12756,
      "grad_norm": 1.2108384992780414,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 12756
    },
    {
      "epoch": 0.12757,
      "grad_norm": 1.0753953018228892,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 12757
    },
    {
      "epoch": 0.12758,
      "grad_norm": 1.7678007404208398,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 12758
    },
    {
      "epoch": 0.12759,
      "grad_norm": 0.8688622010901259,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 12759
    },
    {
      "epoch": 0.1276,
      "grad_norm": 1.2116409016348908,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 12760
    },
    {
      "epoch": 0.12761,
      "grad_norm": 1.254787253319999,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 12761
    },
    {
      "epoch": 0.12762,
      "grad_norm": 1.1585454805799762,
      "learning_rate": 0.003,
      "loss": 3.994,
      "step": 12762
    },
    {
      "epoch": 0.12763,
      "grad_norm": 1.513644571729462,
      "learning_rate": 0.003,
      "loss": 3.9971,
      "step": 12763
    },
    {
      "epoch": 0.12764,
      "grad_norm": 0.9027207563494971,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 12764
    },
    {
      "epoch": 0.12765,
      "grad_norm": 1.317176094864067,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 12765
    },
    {
      "epoch": 0.12766,
      "grad_norm": 1.117017075660199,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 12766
    },
    {
      "epoch": 0.12767,
      "grad_norm": 1.1414308429206412,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 12767
    },
    {
      "epoch": 0.12768,
      "grad_norm": 1.2669417304006838,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 12768
    },
    {
      "epoch": 0.12769,
      "grad_norm": 0.9801818112259263,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 12769
    },
    {
      "epoch": 0.1277,
      "grad_norm": 1.2263495100618897,
      "learning_rate": 0.003,
      "loss": 3.993,
      "step": 12770
    },
    {
      "epoch": 0.12771,
      "grad_norm": 1.1259193359926396,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 12771
    },
    {
      "epoch": 0.12772,
      "grad_norm": 1.2606850167055357,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 12772
    },
    {
      "epoch": 0.12773,
      "grad_norm": 1.1914645637275918,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 12773
    },
    {
      "epoch": 0.12774,
      "grad_norm": 1.110448610605845,
      "learning_rate": 0.003,
      "loss": 3.978,
      "step": 12774
    },
    {
      "epoch": 0.12775,
      "grad_norm": 1.4574492731231232,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 12775
    },
    {
      "epoch": 0.12776,
      "grad_norm": 1.2136472323541958,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 12776
    },
    {
      "epoch": 0.12777,
      "grad_norm": 1.1822253563491354,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 12777
    },
    {
      "epoch": 0.12778,
      "grad_norm": 1.0520996981570394,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 12778
    },
    {
      "epoch": 0.12779,
      "grad_norm": 1.2831106263336869,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 12779
    },
    {
      "epoch": 0.1278,
      "grad_norm": 1.0560172383499893,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 12780
    },
    {
      "epoch": 0.12781,
      "grad_norm": 1.258889909849877,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 12781
    },
    {
      "epoch": 0.12782,
      "grad_norm": 1.2827369883028994,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 12782
    },
    {
      "epoch": 0.12783,
      "grad_norm": 1.2007328133659945,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 12783
    },
    {
      "epoch": 0.12784,
      "grad_norm": 1.2077859948776573,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 12784
    },
    {
      "epoch": 0.12785,
      "grad_norm": 1.1480481310658968,
      "learning_rate": 0.003,
      "loss": 3.9951,
      "step": 12785
    },
    {
      "epoch": 0.12786,
      "grad_norm": 1.2382022064067508,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 12786
    },
    {
      "epoch": 0.12787,
      "grad_norm": 0.9824667158119392,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 12787
    },
    {
      "epoch": 0.12788,
      "grad_norm": 1.1508760952959747,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 12788
    },
    {
      "epoch": 0.12789,
      "grad_norm": 1.1160820774172333,
      "learning_rate": 0.003,
      "loss": 4.0122,
      "step": 12789
    },
    {
      "epoch": 0.1279,
      "grad_norm": 1.2883683988031678,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 12790
    },
    {
      "epoch": 0.12791,
      "grad_norm": 1.333631695602577,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 12791
    },
    {
      "epoch": 0.12792,
      "grad_norm": 1.069325969160836,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 12792
    },
    {
      "epoch": 0.12793,
      "grad_norm": 1.3669166797853736,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 12793
    },
    {
      "epoch": 0.12794,
      "grad_norm": 1.1649933110212536,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 12794
    },
    {
      "epoch": 0.12795,
      "grad_norm": 1.330830664464028,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 12795
    },
    {
      "epoch": 0.12796,
      "grad_norm": 1.1422954086481265,
      "learning_rate": 0.003,
      "loss": 3.9971,
      "step": 12796
    },
    {
      "epoch": 0.12797,
      "grad_norm": 1.5008484587331534,
      "learning_rate": 0.003,
      "loss": 4.0095,
      "step": 12797
    },
    {
      "epoch": 0.12798,
      "grad_norm": 1.0499892932988404,
      "learning_rate": 0.003,
      "loss": 3.9983,
      "step": 12798
    },
    {
      "epoch": 0.12799,
      "grad_norm": 1.1323962859092773,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 12799
    },
    {
      "epoch": 0.128,
      "grad_norm": 1.0851630911927252,
      "learning_rate": 0.003,
      "loss": 4.0063,
      "step": 12800
    },
    {
      "epoch": 0.12801,
      "grad_norm": 1.5752077380787768,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 12801
    },
    {
      "epoch": 0.12802,
      "grad_norm": 1.1017740945533272,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 12802
    },
    {
      "epoch": 0.12803,
      "grad_norm": 1.1667952025678203,
      "learning_rate": 0.003,
      "loss": 4.0067,
      "step": 12803
    },
    {
      "epoch": 0.12804,
      "grad_norm": 1.0195581846750794,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 12804
    },
    {
      "epoch": 0.12805,
      "grad_norm": 1.2081431236473223,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 12805
    },
    {
      "epoch": 0.12806,
      "grad_norm": 0.9719602406726462,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 12806
    },
    {
      "epoch": 0.12807,
      "grad_norm": 1.299116883426473,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 12807
    },
    {
      "epoch": 0.12808,
      "grad_norm": 0.9210572627915543,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 12808
    },
    {
      "epoch": 0.12809,
      "grad_norm": 1.0176288806302762,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 12809
    },
    {
      "epoch": 0.1281,
      "grad_norm": 1.179455454339618,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 12810
    },
    {
      "epoch": 0.12811,
      "grad_norm": 1.125011783055268,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 12811
    },
    {
      "epoch": 0.12812,
      "grad_norm": 1.2823431624618908,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 12812
    },
    {
      "epoch": 0.12813,
      "grad_norm": 1.0853685286518326,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 12813
    },
    {
      "epoch": 0.12814,
      "grad_norm": 1.3098030352639953,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 12814
    },
    {
      "epoch": 0.12815,
      "grad_norm": 1.4009011882707822,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 12815
    },
    {
      "epoch": 0.12816,
      "grad_norm": 1.0624113021821955,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 12816
    },
    {
      "epoch": 0.12817,
      "grad_norm": 1.5273471712819569,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 12817
    },
    {
      "epoch": 0.12818,
      "grad_norm": 1.1622392449946617,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 12818
    },
    {
      "epoch": 0.12819,
      "grad_norm": 1.0737180829548525,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 12819
    },
    {
      "epoch": 0.1282,
      "grad_norm": 1.5028275338727308,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 12820
    },
    {
      "epoch": 0.12821,
      "grad_norm": 0.9820781601781966,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 12821
    },
    {
      "epoch": 0.12822,
      "grad_norm": 1.189653346367063,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 12822
    },
    {
      "epoch": 0.12823,
      "grad_norm": 1.305750372775048,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 12823
    },
    {
      "epoch": 0.12824,
      "grad_norm": 1.1085853553524894,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 12824
    },
    {
      "epoch": 0.12825,
      "grad_norm": 1.3521052760748702,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 12825
    },
    {
      "epoch": 0.12826,
      "grad_norm": 1.1339361194151034,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 12826
    },
    {
      "epoch": 0.12827,
      "grad_norm": 1.2919587450515737,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 12827
    },
    {
      "epoch": 0.12828,
      "grad_norm": 1.1162216951237776,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 12828
    },
    {
      "epoch": 0.12829,
      "grad_norm": 1.1035553365879642,
      "learning_rate": 0.003,
      "loss": 4.0063,
      "step": 12829
    },
    {
      "epoch": 0.1283,
      "grad_norm": 1.418334221315941,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 12830
    },
    {
      "epoch": 0.12831,
      "grad_norm": 0.827587958149206,
      "learning_rate": 0.003,
      "loss": 4.0078,
      "step": 12831
    },
    {
      "epoch": 0.12832,
      "grad_norm": 0.9941792427885942,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 12832
    },
    {
      "epoch": 0.12833,
      "grad_norm": 1.3415460248468394,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 12833
    },
    {
      "epoch": 0.12834,
      "grad_norm": 1.5454290749754083,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 12834
    },
    {
      "epoch": 0.12835,
      "grad_norm": 0.966935009632508,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 12835
    },
    {
      "epoch": 0.12836,
      "grad_norm": 1.3383965177659312,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 12836
    },
    {
      "epoch": 0.12837,
      "grad_norm": 1.214439584671967,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 12837
    },
    {
      "epoch": 0.12838,
      "grad_norm": 1.2522112846423572,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 12838
    },
    {
      "epoch": 0.12839,
      "grad_norm": 1.114227336414836,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 12839
    },
    {
      "epoch": 0.1284,
      "grad_norm": 1.3933317053817007,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 12840
    },
    {
      "epoch": 0.12841,
      "grad_norm": 0.9084725468732007,
      "learning_rate": 0.003,
      "loss": 3.9874,
      "step": 12841
    },
    {
      "epoch": 0.12842,
      "grad_norm": 1.3593545948818664,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 12842
    },
    {
      "epoch": 0.12843,
      "grad_norm": 1.05750875955245,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 12843
    },
    {
      "epoch": 0.12844,
      "grad_norm": 1.2623165134390104,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 12844
    },
    {
      "epoch": 0.12845,
      "grad_norm": 1.215934308443997,
      "learning_rate": 0.003,
      "loss": 4.0122,
      "step": 12845
    },
    {
      "epoch": 0.12846,
      "grad_norm": 1.2534991068204633,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 12846
    },
    {
      "epoch": 0.12847,
      "grad_norm": 1.2460794845808603,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 12847
    },
    {
      "epoch": 0.12848,
      "grad_norm": 1.1933751931734735,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 12848
    },
    {
      "epoch": 0.12849,
      "grad_norm": 1.166245408826738,
      "learning_rate": 0.003,
      "loss": 3.9932,
      "step": 12849
    },
    {
      "epoch": 0.1285,
      "grad_norm": 1.1464404870864988,
      "learning_rate": 0.003,
      "loss": 4.003,
      "step": 12850
    },
    {
      "epoch": 0.12851,
      "grad_norm": 1.043965368615515,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 12851
    },
    {
      "epoch": 0.12852,
      "grad_norm": 1.6275648542340913,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 12852
    },
    {
      "epoch": 0.12853,
      "grad_norm": 0.9973713390567429,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 12853
    },
    {
      "epoch": 0.12854,
      "grad_norm": 1.3145129655582115,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 12854
    },
    {
      "epoch": 0.12855,
      "grad_norm": 0.9255561273968774,
      "learning_rate": 0.003,
      "loss": 3.9876,
      "step": 12855
    },
    {
      "epoch": 0.12856,
      "grad_norm": 1.123206468742272,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 12856
    },
    {
      "epoch": 0.12857,
      "grad_norm": 1.165459338103236,
      "learning_rate": 0.003,
      "loss": 4.0018,
      "step": 12857
    },
    {
      "epoch": 0.12858,
      "grad_norm": 1.1367968392625207,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 12858
    },
    {
      "epoch": 0.12859,
      "grad_norm": 0.9960241599180506,
      "learning_rate": 0.003,
      "loss": 4.0013,
      "step": 12859
    },
    {
      "epoch": 0.1286,
      "grad_norm": 1.3943574392367513,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 12860
    },
    {
      "epoch": 0.12861,
      "grad_norm": 1.0839718347871652,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 12861
    },
    {
      "epoch": 0.12862,
      "grad_norm": 1.450534960996423,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 12862
    },
    {
      "epoch": 0.12863,
      "grad_norm": 1.2124543848819562,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 12863
    },
    {
      "epoch": 0.12864,
      "grad_norm": 1.0730704444607297,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 12864
    },
    {
      "epoch": 0.12865,
      "grad_norm": 1.416501422991841,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 12865
    },
    {
      "epoch": 0.12866,
      "grad_norm": 1.2864051589898484,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 12866
    },
    {
      "epoch": 0.12867,
      "grad_norm": 1.3012900075342926,
      "learning_rate": 0.003,
      "loss": 4.007,
      "step": 12867
    },
    {
      "epoch": 0.12868,
      "grad_norm": 1.2316271420711697,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 12868
    },
    {
      "epoch": 0.12869,
      "grad_norm": 1.0559148720019054,
      "learning_rate": 0.003,
      "loss": 3.9927,
      "step": 12869
    },
    {
      "epoch": 0.1287,
      "grad_norm": 1.2076505185344402,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 12870
    },
    {
      "epoch": 0.12871,
      "grad_norm": 1.0351257024278024,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 12871
    },
    {
      "epoch": 0.12872,
      "grad_norm": 1.4946297034159481,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 12872
    },
    {
      "epoch": 0.12873,
      "grad_norm": 1.0444761583654851,
      "learning_rate": 0.003,
      "loss": 4.0116,
      "step": 12873
    },
    {
      "epoch": 0.12874,
      "grad_norm": 1.4111365525307598,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 12874
    },
    {
      "epoch": 0.12875,
      "grad_norm": 0.92305991229768,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 12875
    },
    {
      "epoch": 0.12876,
      "grad_norm": 1.181940558244761,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 12876
    },
    {
      "epoch": 0.12877,
      "grad_norm": 1.3588258265079936,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 12877
    },
    {
      "epoch": 0.12878,
      "grad_norm": 1.1435193997564612,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 12878
    },
    {
      "epoch": 0.12879,
      "grad_norm": 1.1931087094596353,
      "learning_rate": 0.003,
      "loss": 3.9982,
      "step": 12879
    },
    {
      "epoch": 0.1288,
      "grad_norm": 1.0300788873313722,
      "learning_rate": 0.003,
      "loss": 4.0035,
      "step": 12880
    },
    {
      "epoch": 0.12881,
      "grad_norm": 1.2086016131275303,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 12881
    },
    {
      "epoch": 0.12882,
      "grad_norm": 1.0888737394809258,
      "learning_rate": 0.003,
      "loss": 4.0138,
      "step": 12882
    },
    {
      "epoch": 0.12883,
      "grad_norm": 1.2692961787043513,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 12883
    },
    {
      "epoch": 0.12884,
      "grad_norm": 0.906168836593753,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 12884
    },
    {
      "epoch": 0.12885,
      "grad_norm": 1.0714132168907917,
      "learning_rate": 0.003,
      "loss": 4.0012,
      "step": 12885
    },
    {
      "epoch": 0.12886,
      "grad_norm": 1.384801008569906,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 12886
    },
    {
      "epoch": 0.12887,
      "grad_norm": 1.0782291140817764,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 12887
    },
    {
      "epoch": 0.12888,
      "grad_norm": 1.2316065046066518,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 12888
    },
    {
      "epoch": 0.12889,
      "grad_norm": 1.2766952978758708,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 12889
    },
    {
      "epoch": 0.1289,
      "grad_norm": 1.14895904230218,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 12890
    },
    {
      "epoch": 0.12891,
      "grad_norm": 1.1935543230517358,
      "learning_rate": 0.003,
      "loss": 4.008,
      "step": 12891
    },
    {
      "epoch": 0.12892,
      "grad_norm": 1.0722825731155168,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 12892
    },
    {
      "epoch": 0.12893,
      "grad_norm": 1.1006109032398521,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 12893
    },
    {
      "epoch": 0.12894,
      "grad_norm": 1.1575330038873581,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 12894
    },
    {
      "epoch": 0.12895,
      "grad_norm": 1.4045037649238294,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 12895
    },
    {
      "epoch": 0.12896,
      "grad_norm": 1.2226745359152975,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 12896
    },
    {
      "epoch": 0.12897,
      "grad_norm": 0.9810855290951486,
      "learning_rate": 0.003,
      "loss": 3.9741,
      "step": 12897
    },
    {
      "epoch": 0.12898,
      "grad_norm": 1.4321113882827925,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 12898
    },
    {
      "epoch": 0.12899,
      "grad_norm": 0.9279563463843422,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 12899
    },
    {
      "epoch": 0.129,
      "grad_norm": 1.0112843463751415,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 12900
    },
    {
      "epoch": 0.12901,
      "grad_norm": 1.258312102527465,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 12901
    },
    {
      "epoch": 0.12902,
      "grad_norm": 1.2599438181290643,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 12902
    },
    {
      "epoch": 0.12903,
      "grad_norm": 1.3168883187604847,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 12903
    },
    {
      "epoch": 0.12904,
      "grad_norm": 1.158850205061437,
      "learning_rate": 0.003,
      "loss": 3.99,
      "step": 12904
    },
    {
      "epoch": 0.12905,
      "grad_norm": 1.2534304093157755,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 12905
    },
    {
      "epoch": 0.12906,
      "grad_norm": 1.2279071281854934,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 12906
    },
    {
      "epoch": 0.12907,
      "grad_norm": 1.3135077430669198,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 12907
    },
    {
      "epoch": 0.12908,
      "grad_norm": 1.1340062936234097,
      "learning_rate": 0.003,
      "loss": 3.9859,
      "step": 12908
    },
    {
      "epoch": 0.12909,
      "grad_norm": 1.2105010880042733,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 12909
    },
    {
      "epoch": 0.1291,
      "grad_norm": 1.035585642600207,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 12910
    },
    {
      "epoch": 0.12911,
      "grad_norm": 1.2677876907556864,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 12911
    },
    {
      "epoch": 0.12912,
      "grad_norm": 1.1098026814924011,
      "learning_rate": 0.003,
      "loss": 3.9937,
      "step": 12912
    },
    {
      "epoch": 0.12913,
      "grad_norm": 1.3247567294192373,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 12913
    },
    {
      "epoch": 0.12914,
      "grad_norm": 1.087353870991555,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 12914
    },
    {
      "epoch": 0.12915,
      "grad_norm": 1.3235097791685535,
      "learning_rate": 0.003,
      "loss": 3.9984,
      "step": 12915
    },
    {
      "epoch": 0.12916,
      "grad_norm": 1.0418896283143477,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 12916
    },
    {
      "epoch": 0.12917,
      "grad_norm": 1.430243661406629,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 12917
    },
    {
      "epoch": 0.12918,
      "grad_norm": 1.244820526694616,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 12918
    },
    {
      "epoch": 0.12919,
      "grad_norm": 1.0491507724949716,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 12919
    },
    {
      "epoch": 0.1292,
      "grad_norm": 1.7564519294617458,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 12920
    },
    {
      "epoch": 0.12921,
      "grad_norm": 0.9365160750153148,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 12921
    },
    {
      "epoch": 0.12922,
      "grad_norm": 1.2570769167876839,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 12922
    },
    {
      "epoch": 0.12923,
      "grad_norm": 1.1340761136316975,
      "learning_rate": 0.003,
      "loss": 3.9872,
      "step": 12923
    },
    {
      "epoch": 0.12924,
      "grad_norm": 1.1839216294414823,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 12924
    },
    {
      "epoch": 0.12925,
      "grad_norm": 1.2177068177432462,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 12925
    },
    {
      "epoch": 0.12926,
      "grad_norm": 1.3158018270836227,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 12926
    },
    {
      "epoch": 0.12927,
      "grad_norm": 1.0917985819220148,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 12927
    },
    {
      "epoch": 0.12928,
      "grad_norm": 1.2406607657891981,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 12928
    },
    {
      "epoch": 0.12929,
      "grad_norm": 1.2452029167820236,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 12929
    },
    {
      "epoch": 0.1293,
      "grad_norm": 1.0704826750375576,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 12930
    },
    {
      "epoch": 0.12931,
      "grad_norm": 1.1059105445383641,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 12931
    },
    {
      "epoch": 0.12932,
      "grad_norm": 1.2032685945299755,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 12932
    },
    {
      "epoch": 0.12933,
      "grad_norm": 1.139197150088746,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 12933
    },
    {
      "epoch": 0.12934,
      "grad_norm": 1.1274527096083895,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 12934
    },
    {
      "epoch": 0.12935,
      "grad_norm": 1.507321804546948,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 12935
    },
    {
      "epoch": 0.12936,
      "grad_norm": 0.975323122122746,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 12936
    },
    {
      "epoch": 0.12937,
      "grad_norm": 1.3623939109449617,
      "learning_rate": 0.003,
      "loss": 4.0061,
      "step": 12937
    },
    {
      "epoch": 0.12938,
      "grad_norm": 1.057288137364664,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 12938
    },
    {
      "epoch": 0.12939,
      "grad_norm": 1.3259203102570047,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 12939
    },
    {
      "epoch": 0.1294,
      "grad_norm": 1.110701965074797,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 12940
    },
    {
      "epoch": 0.12941,
      "grad_norm": 1.287210392735098,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 12941
    },
    {
      "epoch": 0.12942,
      "grad_norm": 1.1518747016361184,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 12942
    },
    {
      "epoch": 0.12943,
      "grad_norm": 1.1900719699662106,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 12943
    },
    {
      "epoch": 0.12944,
      "grad_norm": 1.154771928089284,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 12944
    },
    {
      "epoch": 0.12945,
      "grad_norm": 1.1482591004768885,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 12945
    },
    {
      "epoch": 0.12946,
      "grad_norm": 1.0932439918983072,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 12946
    },
    {
      "epoch": 0.12947,
      "grad_norm": 1.2097249296318278,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 12947
    },
    {
      "epoch": 0.12948,
      "grad_norm": 1.2986622397477905,
      "learning_rate": 0.003,
      "loss": 4.0083,
      "step": 12948
    },
    {
      "epoch": 0.12949,
      "grad_norm": 1.3825525145439725,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 12949
    },
    {
      "epoch": 0.1295,
      "grad_norm": 1.1758601196064937,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 12950
    },
    {
      "epoch": 0.12951,
      "grad_norm": 1.3591353151221124,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 12951
    },
    {
      "epoch": 0.12952,
      "grad_norm": 0.9131650280317642,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 12952
    },
    {
      "epoch": 0.12953,
      "grad_norm": 1.1014974217693525,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 12953
    },
    {
      "epoch": 0.12954,
      "grad_norm": 1.3026270117165117,
      "learning_rate": 0.003,
      "loss": 4.0045,
      "step": 12954
    },
    {
      "epoch": 0.12955,
      "grad_norm": 1.2136899689336957,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 12955
    },
    {
      "epoch": 0.12956,
      "grad_norm": 1.1081746289222236,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 12956
    },
    {
      "epoch": 0.12957,
      "grad_norm": 1.3558413742243949,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 12957
    },
    {
      "epoch": 0.12958,
      "grad_norm": 1.2985873728581943,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 12958
    },
    {
      "epoch": 0.12959,
      "grad_norm": 1.0915415838285472,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 12959
    },
    {
      "epoch": 0.1296,
      "grad_norm": 1.2271488870813643,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 12960
    },
    {
      "epoch": 0.12961,
      "grad_norm": 1.0660449140643198,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 12961
    },
    {
      "epoch": 0.12962,
      "grad_norm": 1.2713070418720303,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 12962
    },
    {
      "epoch": 0.12963,
      "grad_norm": 1.1140722977923936,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 12963
    },
    {
      "epoch": 0.12964,
      "grad_norm": 1.4334928816984343,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 12964
    },
    {
      "epoch": 0.12965,
      "grad_norm": 1.0798644543572762,
      "learning_rate": 0.003,
      "loss": 4.0005,
      "step": 12965
    },
    {
      "epoch": 0.12966,
      "grad_norm": 1.3576702711558502,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 12966
    },
    {
      "epoch": 0.12967,
      "grad_norm": 0.9565988586229004,
      "learning_rate": 0.003,
      "loss": 3.9703,
      "step": 12967
    },
    {
      "epoch": 0.12968,
      "grad_norm": 1.356676054202947,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 12968
    },
    {
      "epoch": 0.12969,
      "grad_norm": 1.1836635707869203,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 12969
    },
    {
      "epoch": 0.1297,
      "grad_norm": 1.1187549800265795,
      "learning_rate": 0.003,
      "loss": 3.9948,
      "step": 12970
    },
    {
      "epoch": 0.12971,
      "grad_norm": 1.1920675764967605,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 12971
    },
    {
      "epoch": 0.12972,
      "grad_norm": 1.1969678957485745,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 12972
    },
    {
      "epoch": 0.12973,
      "grad_norm": 1.1830367876527692,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 12973
    },
    {
      "epoch": 0.12974,
      "grad_norm": 1.094989770680786,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 12974
    },
    {
      "epoch": 0.12975,
      "grad_norm": 1.2852262825752716,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 12975
    },
    {
      "epoch": 0.12976,
      "grad_norm": 1.2513488888113358,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 12976
    },
    {
      "epoch": 0.12977,
      "grad_norm": 1.160455662683476,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 12977
    },
    {
      "epoch": 0.12978,
      "grad_norm": 1.1863743123144554,
      "learning_rate": 0.003,
      "loss": 4.0034,
      "step": 12978
    },
    {
      "epoch": 0.12979,
      "grad_norm": 0.9523514728095331,
      "learning_rate": 0.003,
      "loss": 3.9928,
      "step": 12979
    },
    {
      "epoch": 0.1298,
      "grad_norm": 1.2746725183316685,
      "learning_rate": 0.003,
      "loss": 3.9987,
      "step": 12980
    },
    {
      "epoch": 0.12981,
      "grad_norm": 1.2113630031469498,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 12981
    },
    {
      "epoch": 0.12982,
      "grad_norm": 1.4401568049490077,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 12982
    },
    {
      "epoch": 0.12983,
      "grad_norm": 1.0589139916757784,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 12983
    },
    {
      "epoch": 0.12984,
      "grad_norm": 1.3111421740836104,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 12984
    },
    {
      "epoch": 0.12985,
      "grad_norm": 1.0916263279984209,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 12985
    },
    {
      "epoch": 0.12986,
      "grad_norm": 1.3150320490843068,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 12986
    },
    {
      "epoch": 0.12987,
      "grad_norm": 1.1622979768305781,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 12987
    },
    {
      "epoch": 0.12988,
      "grad_norm": 1.1795490087421945,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 12988
    },
    {
      "epoch": 0.12989,
      "grad_norm": 1.2023289153396604,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 12989
    },
    {
      "epoch": 0.1299,
      "grad_norm": 1.2904015849325297,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 12990
    },
    {
      "epoch": 0.12991,
      "grad_norm": 1.1788631395030478,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 12991
    },
    {
      "epoch": 0.12992,
      "grad_norm": 1.1325099422571487,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 12992
    },
    {
      "epoch": 0.12993,
      "grad_norm": 1.2285007962804115,
      "learning_rate": 0.003,
      "loss": 4.0035,
      "step": 12993
    },
    {
      "epoch": 0.12994,
      "grad_norm": 1.1110998831328704,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 12994
    },
    {
      "epoch": 0.12995,
      "grad_norm": 1.345079656718258,
      "learning_rate": 0.003,
      "loss": 4.0001,
      "step": 12995
    },
    {
      "epoch": 0.12996,
      "grad_norm": 1.3307329461460866,
      "learning_rate": 0.003,
      "loss": 3.995,
      "step": 12996
    },
    {
      "epoch": 0.12997,
      "grad_norm": 1.1226943235816225,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 12997
    },
    {
      "epoch": 0.12998,
      "grad_norm": 1.3592512691421177,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 12998
    },
    {
      "epoch": 0.12999,
      "grad_norm": 1.0450028186646942,
      "learning_rate": 0.003,
      "loss": 3.9929,
      "step": 12999
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1867332569892648,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 13000
    },
    {
      "epoch": 0.13001,
      "grad_norm": 1.23639273204252,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 13001
    },
    {
      "epoch": 0.13002,
      "grad_norm": 1.1831666531849891,
      "learning_rate": 0.003,
      "loss": 3.9997,
      "step": 13002
    },
    {
      "epoch": 0.13003,
      "grad_norm": 1.2823332562222987,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 13003
    },
    {
      "epoch": 0.13004,
      "grad_norm": 1.191483777431203,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 13004
    },
    {
      "epoch": 0.13005,
      "grad_norm": 1.1804479129189784,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 13005
    },
    {
      "epoch": 0.13006,
      "grad_norm": 1.0660348758813278,
      "learning_rate": 0.003,
      "loss": 3.9914,
      "step": 13006
    },
    {
      "epoch": 0.13007,
      "grad_norm": 1.1998590835283771,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 13007
    },
    {
      "epoch": 0.13008,
      "grad_norm": 1.211992067571615,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 13008
    },
    {
      "epoch": 0.13009,
      "grad_norm": 1.0715092102241917,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 13009
    },
    {
      "epoch": 0.1301,
      "grad_norm": 1.2311289310569815,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 13010
    },
    {
      "epoch": 0.13011,
      "grad_norm": 1.1482130599826008,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 13011
    },
    {
      "epoch": 0.13012,
      "grad_norm": 1.419970438601161,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 13012
    },
    {
      "epoch": 0.13013,
      "grad_norm": 1.289807647099006,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 13013
    },
    {
      "epoch": 0.13014,
      "grad_norm": 1.3265352693337469,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 13014
    },
    {
      "epoch": 0.13015,
      "grad_norm": 1.3345475639012292,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 13015
    },
    {
      "epoch": 0.13016,
      "grad_norm": 1.0368363723912866,
      "learning_rate": 0.003,
      "loss": 4.0047,
      "step": 13016
    },
    {
      "epoch": 0.13017,
      "grad_norm": 1.2877451811752048,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 13017
    },
    {
      "epoch": 0.13018,
      "grad_norm": 1.012908939390002,
      "learning_rate": 0.003,
      "loss": 4.0053,
      "step": 13018
    },
    {
      "epoch": 0.13019,
      "grad_norm": 1.3703226825238353,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 13019
    },
    {
      "epoch": 0.1302,
      "grad_norm": 1.176604438017887,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 13020
    },
    {
      "epoch": 0.13021,
      "grad_norm": 1.1122871690777643,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 13021
    },
    {
      "epoch": 0.13022,
      "grad_norm": 1.2974145454477173,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 13022
    },
    {
      "epoch": 0.13023,
      "grad_norm": 1.2352521460749524,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 13023
    },
    {
      "epoch": 0.13024,
      "grad_norm": 1.168606954054629,
      "learning_rate": 0.003,
      "loss": 3.9779,
      "step": 13024
    },
    {
      "epoch": 0.13025,
      "grad_norm": 1.2499960934819396,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 13025
    },
    {
      "epoch": 0.13026,
      "grad_norm": 1.3152707080427926,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 13026
    },
    {
      "epoch": 0.13027,
      "grad_norm": 1.1778120535765901,
      "learning_rate": 0.003,
      "loss": 4.002,
      "step": 13027
    },
    {
      "epoch": 0.13028,
      "grad_norm": 1.2079691050764194,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 13028
    },
    {
      "epoch": 0.13029,
      "grad_norm": 1.1071644576188255,
      "learning_rate": 0.003,
      "loss": 4.0015,
      "step": 13029
    },
    {
      "epoch": 0.1303,
      "grad_norm": 1.3864312407201933,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 13030
    },
    {
      "epoch": 0.13031,
      "grad_norm": 1.1125770889548465,
      "learning_rate": 0.003,
      "loss": 4.0085,
      "step": 13031
    },
    {
      "epoch": 0.13032,
      "grad_norm": 1.3413211527215434,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 13032
    },
    {
      "epoch": 0.13033,
      "grad_norm": 1.06880186736233,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 13033
    },
    {
      "epoch": 0.13034,
      "grad_norm": 1.2728892725094454,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 13034
    },
    {
      "epoch": 0.13035,
      "grad_norm": 1.0651150503240214,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 13035
    },
    {
      "epoch": 0.13036,
      "grad_norm": 1.1830519127271366,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 13036
    },
    {
      "epoch": 0.13037,
      "grad_norm": 1.1141922529152608,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 13037
    },
    {
      "epoch": 0.13038,
      "grad_norm": 1.3343469496900424,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 13038
    },
    {
      "epoch": 0.13039,
      "grad_norm": 1.134356082858109,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 13039
    },
    {
      "epoch": 0.1304,
      "grad_norm": 1.0985262524667785,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 13040
    },
    {
      "epoch": 0.13041,
      "grad_norm": 1.4877606344783099,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 13041
    },
    {
      "epoch": 0.13042,
      "grad_norm": 1.1653508801524972,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 13042
    },
    {
      "epoch": 0.13043,
      "grad_norm": 1.1754164423686313,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 13043
    },
    {
      "epoch": 0.13044,
      "grad_norm": 1.3302602473205494,
      "learning_rate": 0.003,
      "loss": 4.0015,
      "step": 13044
    },
    {
      "epoch": 0.13045,
      "grad_norm": 1.0904545751149235,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 13045
    },
    {
      "epoch": 0.13046,
      "grad_norm": 1.3172464854441723,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 13046
    },
    {
      "epoch": 0.13047,
      "grad_norm": 1.00886913655674,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 13047
    },
    {
      "epoch": 0.13048,
      "grad_norm": 1.244453787403923,
      "learning_rate": 0.003,
      "loss": 3.9889,
      "step": 13048
    },
    {
      "epoch": 0.13049,
      "grad_norm": 0.9503210626593559,
      "learning_rate": 0.003,
      "loss": 3.9859,
      "step": 13049
    },
    {
      "epoch": 0.1305,
      "grad_norm": 1.1082209982210707,
      "learning_rate": 0.003,
      "loss": 3.9874,
      "step": 13050
    },
    {
      "epoch": 0.13051,
      "grad_norm": 1.3825316783754058,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 13051
    },
    {
      "epoch": 0.13052,
      "grad_norm": 1.1065415073757985,
      "learning_rate": 0.003,
      "loss": 4.0081,
      "step": 13052
    },
    {
      "epoch": 0.13053,
      "grad_norm": 1.2282454715331608,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 13053
    },
    {
      "epoch": 0.13054,
      "grad_norm": 1.2066879253376723,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 13054
    },
    {
      "epoch": 0.13055,
      "grad_norm": 0.9873407819705948,
      "learning_rate": 0.003,
      "loss": 4.0019,
      "step": 13055
    },
    {
      "epoch": 0.13056,
      "grad_norm": 1.3135267445481453,
      "learning_rate": 0.003,
      "loss": 4.0065,
      "step": 13056
    },
    {
      "epoch": 0.13057,
      "grad_norm": 1.0202575371173688,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 13057
    },
    {
      "epoch": 0.13058,
      "grad_norm": 1.331106710918081,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 13058
    },
    {
      "epoch": 0.13059,
      "grad_norm": 1.1490975344521086,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 13059
    },
    {
      "epoch": 0.1306,
      "grad_norm": 1.3554628358227565,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 13060
    },
    {
      "epoch": 0.13061,
      "grad_norm": 1.0613734741931096,
      "learning_rate": 0.003,
      "loss": 4.0005,
      "step": 13061
    },
    {
      "epoch": 0.13062,
      "grad_norm": 1.1786998069131467,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 13062
    },
    {
      "epoch": 0.13063,
      "grad_norm": 1.122364934303279,
      "learning_rate": 0.003,
      "loss": 3.9898,
      "step": 13063
    },
    {
      "epoch": 0.13064,
      "grad_norm": 1.245717571216681,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 13064
    },
    {
      "epoch": 0.13065,
      "grad_norm": 1.2300096300862102,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 13065
    },
    {
      "epoch": 0.13066,
      "grad_norm": 1.177243977511725,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 13066
    },
    {
      "epoch": 0.13067,
      "grad_norm": 1.121267875399439,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 13067
    },
    {
      "epoch": 0.13068,
      "grad_norm": 1.4458148439390073,
      "learning_rate": 0.003,
      "loss": 4.0067,
      "step": 13068
    },
    {
      "epoch": 0.13069,
      "grad_norm": 1.0655288352193661,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 13069
    },
    {
      "epoch": 0.1307,
      "grad_norm": 1.4673778899232879,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 13070
    },
    {
      "epoch": 0.13071,
      "grad_norm": 1.0077870922762957,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 13071
    },
    {
      "epoch": 0.13072,
      "grad_norm": 1.2477720049192509,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 13072
    },
    {
      "epoch": 0.13073,
      "grad_norm": 1.2295747164214397,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 13073
    },
    {
      "epoch": 0.13074,
      "grad_norm": 1.2677829946765515,
      "learning_rate": 0.003,
      "loss": 4.0063,
      "step": 13074
    },
    {
      "epoch": 0.13075,
      "grad_norm": 1.3998043440354033,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 13075
    },
    {
      "epoch": 0.13076,
      "grad_norm": 1.0540236803053258,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 13076
    },
    {
      "epoch": 0.13077,
      "grad_norm": 1.2607216048499503,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 13077
    },
    {
      "epoch": 0.13078,
      "grad_norm": 1.0932345623866864,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 13078
    },
    {
      "epoch": 0.13079,
      "grad_norm": 1.2051592865391518,
      "learning_rate": 0.003,
      "loss": 3.991,
      "step": 13079
    },
    {
      "epoch": 0.1308,
      "grad_norm": 1.0491942575755948,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 13080
    },
    {
      "epoch": 0.13081,
      "grad_norm": 1.3062916574774783,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 13081
    },
    {
      "epoch": 0.13082,
      "grad_norm": 1.1321098357352237,
      "learning_rate": 0.003,
      "loss": 4.0038,
      "step": 13082
    },
    {
      "epoch": 0.13083,
      "grad_norm": 1.3665990418676934,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 13083
    },
    {
      "epoch": 0.13084,
      "grad_norm": 0.9978809201908994,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 13084
    },
    {
      "epoch": 0.13085,
      "grad_norm": 1.373453319634236,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 13085
    },
    {
      "epoch": 0.13086,
      "grad_norm": 0.8948839986805347,
      "learning_rate": 0.003,
      "loss": 4.0017,
      "step": 13086
    },
    {
      "epoch": 0.13087,
      "grad_norm": 1.1553678542467876,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 13087
    },
    {
      "epoch": 0.13088,
      "grad_norm": 1.228440641943863,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 13088
    },
    {
      "epoch": 0.13089,
      "grad_norm": 1.3675739443552888,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 13089
    },
    {
      "epoch": 0.1309,
      "grad_norm": 1.0849909250003047,
      "learning_rate": 0.003,
      "loss": 4.0012,
      "step": 13090
    },
    {
      "epoch": 0.13091,
      "grad_norm": 1.2724323504464439,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 13091
    },
    {
      "epoch": 0.13092,
      "grad_norm": 1.1154728415628237,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 13092
    },
    {
      "epoch": 0.13093,
      "grad_norm": 1.3266592579188943,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 13093
    },
    {
      "epoch": 0.13094,
      "grad_norm": 1.0431979252943413,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 13094
    },
    {
      "epoch": 0.13095,
      "grad_norm": 1.2587443347677894,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 13095
    },
    {
      "epoch": 0.13096,
      "grad_norm": 1.0518579515346653,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 13096
    },
    {
      "epoch": 0.13097,
      "grad_norm": 1.182379232046843,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 13097
    },
    {
      "epoch": 0.13098,
      "grad_norm": 1.2441723313218485,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 13098
    },
    {
      "epoch": 0.13099,
      "grad_norm": 1.1745868481759494,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 13099
    },
    {
      "epoch": 0.131,
      "grad_norm": 1.2466331476420034,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 13100
    },
    {
      "epoch": 0.13101,
      "grad_norm": 1.111399149137611,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 13101
    },
    {
      "epoch": 0.13102,
      "grad_norm": 1.3801332479480273,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 13102
    },
    {
      "epoch": 0.13103,
      "grad_norm": 1.0319688794090096,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 13103
    },
    {
      "epoch": 0.13104,
      "grad_norm": 1.352104538176443,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 13104
    },
    {
      "epoch": 0.13105,
      "grad_norm": 0.9840982994262835,
      "learning_rate": 0.003,
      "loss": 4.012,
      "step": 13105
    },
    {
      "epoch": 0.13106,
      "grad_norm": 1.2720508957564811,
      "learning_rate": 0.003,
      "loss": 4.0048,
      "step": 13106
    },
    {
      "epoch": 0.13107,
      "grad_norm": 1.3582344819108931,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 13107
    },
    {
      "epoch": 0.13108,
      "grad_norm": 1.263872749869849,
      "learning_rate": 0.003,
      "loss": 4.0074,
      "step": 13108
    },
    {
      "epoch": 0.13109,
      "grad_norm": 1.3146328515584427,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 13109
    },
    {
      "epoch": 0.1311,
      "grad_norm": 1.1142588293129503,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 13110
    },
    {
      "epoch": 0.13111,
      "grad_norm": 1.2948103531876165,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 13111
    },
    {
      "epoch": 0.13112,
      "grad_norm": 1.2131004643968781,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 13112
    },
    {
      "epoch": 0.13113,
      "grad_norm": 1.10642883210753,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 13113
    },
    {
      "epoch": 0.13114,
      "grad_norm": 1.2028397006048634,
      "learning_rate": 0.003,
      "loss": 3.9916,
      "step": 13114
    },
    {
      "epoch": 0.13115,
      "grad_norm": 0.9699969253696146,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 13115
    },
    {
      "epoch": 0.13116,
      "grad_norm": 1.244963431503124,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 13116
    },
    {
      "epoch": 0.13117,
      "grad_norm": 1.0532682402073905,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 13117
    },
    {
      "epoch": 0.13118,
      "grad_norm": 1.1743842407713163,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 13118
    },
    {
      "epoch": 0.13119,
      "grad_norm": 1.1241278412152182,
      "learning_rate": 0.003,
      "loss": 4.0053,
      "step": 13119
    },
    {
      "epoch": 0.1312,
      "grad_norm": 1.2176277731495335,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 13120
    },
    {
      "epoch": 0.13121,
      "grad_norm": 1.2384334922856706,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 13121
    },
    {
      "epoch": 0.13122,
      "grad_norm": 1.3559900193532226,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 13122
    },
    {
      "epoch": 0.13123,
      "grad_norm": 1.0763654633297999,
      "learning_rate": 0.003,
      "loss": 4.013,
      "step": 13123
    },
    {
      "epoch": 0.13124,
      "grad_norm": 1.1916683197228783,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 13124
    },
    {
      "epoch": 0.13125,
      "grad_norm": 1.227531132739174,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 13125
    },
    {
      "epoch": 0.13126,
      "grad_norm": 1.0960208761266186,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 13126
    },
    {
      "epoch": 0.13127,
      "grad_norm": 1.2337908088108465,
      "learning_rate": 0.003,
      "loss": 4.0044,
      "step": 13127
    },
    {
      "epoch": 0.13128,
      "grad_norm": 1.1628289901082414,
      "learning_rate": 0.003,
      "loss": 3.9923,
      "step": 13128
    },
    {
      "epoch": 0.13129,
      "grad_norm": 1.6903240617545117,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 13129
    },
    {
      "epoch": 0.1313,
      "grad_norm": 1.0718491893109336,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 13130
    },
    {
      "epoch": 0.13131,
      "grad_norm": 1.389295634459969,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 13131
    },
    {
      "epoch": 0.13132,
      "grad_norm": 0.9675178513023274,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 13132
    },
    {
      "epoch": 0.13133,
      "grad_norm": 1.2463845794263217,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 13133
    },
    {
      "epoch": 0.13134,
      "grad_norm": 1.303025274762317,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 13134
    },
    {
      "epoch": 0.13135,
      "grad_norm": 1.1598289958201813,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 13135
    },
    {
      "epoch": 0.13136,
      "grad_norm": 1.3105925553909337,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 13136
    },
    {
      "epoch": 0.13137,
      "grad_norm": 1.1781013394534319,
      "learning_rate": 0.003,
      "loss": 4.0074,
      "step": 13137
    },
    {
      "epoch": 0.13138,
      "grad_norm": 1.1794371671889319,
      "learning_rate": 0.003,
      "loss": 4.0035,
      "step": 13138
    },
    {
      "epoch": 0.13139,
      "grad_norm": 1.2986139091265898,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 13139
    },
    {
      "epoch": 0.1314,
      "grad_norm": 0.989722674097084,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 13140
    },
    {
      "epoch": 0.13141,
      "grad_norm": 1.3343673770114424,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 13141
    },
    {
      "epoch": 0.13142,
      "grad_norm": 1.1470785700967723,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 13142
    },
    {
      "epoch": 0.13143,
      "grad_norm": 1.1589060396306303,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 13143
    },
    {
      "epoch": 0.13144,
      "grad_norm": 1.0582465386323356,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 13144
    },
    {
      "epoch": 0.13145,
      "grad_norm": 1.3672232216600506,
      "learning_rate": 0.003,
      "loss": 4.0046,
      "step": 13145
    },
    {
      "epoch": 0.13146,
      "grad_norm": 1.1879858947806368,
      "learning_rate": 0.003,
      "loss": 4.0107,
      "step": 13146
    },
    {
      "epoch": 0.13147,
      "grad_norm": 1.20843180027465,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 13147
    },
    {
      "epoch": 0.13148,
      "grad_norm": 1.3432584402500107,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 13148
    },
    {
      "epoch": 0.13149,
      "grad_norm": 0.9095709723571204,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 13149
    },
    {
      "epoch": 0.1315,
      "grad_norm": 1.057646553778702,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 13150
    },
    {
      "epoch": 0.13151,
      "grad_norm": 1.3610467106606845,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 13151
    },
    {
      "epoch": 0.13152,
      "grad_norm": 1.0777210898069285,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 13152
    },
    {
      "epoch": 0.13153,
      "grad_norm": 1.3859619564262946,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 13153
    },
    {
      "epoch": 0.13154,
      "grad_norm": 1.0844304505512883,
      "learning_rate": 0.003,
      "loss": 3.9906,
      "step": 13154
    },
    {
      "epoch": 0.13155,
      "grad_norm": 1.2359660173968072,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 13155
    },
    {
      "epoch": 0.13156,
      "grad_norm": 1.3677030719934207,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 13156
    },
    {
      "epoch": 0.13157,
      "grad_norm": 0.9833401342037275,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 13157
    },
    {
      "epoch": 0.13158,
      "grad_norm": 1.1062735484981285,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 13158
    },
    {
      "epoch": 0.13159,
      "grad_norm": 1.1254248542885985,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 13159
    },
    {
      "epoch": 0.1316,
      "grad_norm": 1.1120092920802103,
      "learning_rate": 0.003,
      "loss": 3.9813,
      "step": 13160
    },
    {
      "epoch": 0.13161,
      "grad_norm": 1.1989205083321814,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 13161
    },
    {
      "epoch": 0.13162,
      "grad_norm": 1.1928171487306694,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 13162
    },
    {
      "epoch": 0.13163,
      "grad_norm": 1.4081902393551464,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 13163
    },
    {
      "epoch": 0.13164,
      "grad_norm": 1.1583999173018455,
      "learning_rate": 0.003,
      "loss": 4.0034,
      "step": 13164
    },
    {
      "epoch": 0.13165,
      "grad_norm": 1.3293599987195368,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 13165
    },
    {
      "epoch": 0.13166,
      "grad_norm": 1.042849775568771,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 13166
    },
    {
      "epoch": 0.13167,
      "grad_norm": 1.179654456448683,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 13167
    },
    {
      "epoch": 0.13168,
      "grad_norm": 1.0543994115678994,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 13168
    },
    {
      "epoch": 0.13169,
      "grad_norm": 1.3091653230611913,
      "learning_rate": 0.003,
      "loss": 3.9994,
      "step": 13169
    },
    {
      "epoch": 0.1317,
      "grad_norm": 1.1789551231881399,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 13170
    },
    {
      "epoch": 0.13171,
      "grad_norm": 1.2679235757864307,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 13171
    },
    {
      "epoch": 0.13172,
      "grad_norm": 1.0121238818707832,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 13172
    },
    {
      "epoch": 0.13173,
      "grad_norm": 1.3674161299963243,
      "learning_rate": 0.003,
      "loss": 3.9998,
      "step": 13173
    },
    {
      "epoch": 0.13174,
      "grad_norm": 1.1333333512368313,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 13174
    },
    {
      "epoch": 0.13175,
      "grad_norm": 1.263735157123079,
      "learning_rate": 0.003,
      "loss": 3.9857,
      "step": 13175
    },
    {
      "epoch": 0.13176,
      "grad_norm": 1.4329249743192631,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 13176
    },
    {
      "epoch": 0.13177,
      "grad_norm": 1.113757810778901,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 13177
    },
    {
      "epoch": 0.13178,
      "grad_norm": 1.2033659807687154,
      "learning_rate": 0.003,
      "loss": 4.0169,
      "step": 13178
    },
    {
      "epoch": 0.13179,
      "grad_norm": 1.1629366378785435,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 13179
    },
    {
      "epoch": 0.1318,
      "grad_norm": 1.3602821104802627,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 13180
    },
    {
      "epoch": 0.13181,
      "grad_norm": 1.2445246941668198,
      "learning_rate": 0.003,
      "loss": 4.0018,
      "step": 13181
    },
    {
      "epoch": 0.13182,
      "grad_norm": 1.337209314159866,
      "learning_rate": 0.003,
      "loss": 4.0067,
      "step": 13182
    },
    {
      "epoch": 0.13183,
      "grad_norm": 0.9878572274233712,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 13183
    },
    {
      "epoch": 0.13184,
      "grad_norm": 1.0960981251343198,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 13184
    },
    {
      "epoch": 0.13185,
      "grad_norm": 1.3102949978898657,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 13185
    },
    {
      "epoch": 0.13186,
      "grad_norm": 1.205314079023528,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 13186
    },
    {
      "epoch": 0.13187,
      "grad_norm": 1.0255720665353423,
      "learning_rate": 0.003,
      "loss": 3.9929,
      "step": 13187
    },
    {
      "epoch": 0.13188,
      "grad_norm": 1.3262167257541027,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 13188
    },
    {
      "epoch": 0.13189,
      "grad_norm": 1.2168641109639486,
      "learning_rate": 0.003,
      "loss": 3.9894,
      "step": 13189
    },
    {
      "epoch": 0.1319,
      "grad_norm": 1.1889265011218615,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 13190
    },
    {
      "epoch": 0.13191,
      "grad_norm": 1.3805581899749395,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 13191
    },
    {
      "epoch": 0.13192,
      "grad_norm": 1.0413867655514306,
      "learning_rate": 0.003,
      "loss": 3.9898,
      "step": 13192
    },
    {
      "epoch": 0.13193,
      "grad_norm": 1.2492107648050148,
      "learning_rate": 0.003,
      "loss": 3.9758,
      "step": 13193
    },
    {
      "epoch": 0.13194,
      "grad_norm": 1.0030085267089808,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 13194
    },
    {
      "epoch": 0.13195,
      "grad_norm": 1.3532728033233967,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 13195
    },
    {
      "epoch": 0.13196,
      "grad_norm": 1.3442955676133201,
      "learning_rate": 0.003,
      "loss": 3.9847,
      "step": 13196
    },
    {
      "epoch": 0.13197,
      "grad_norm": 1.1839299552626144,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 13197
    },
    {
      "epoch": 0.13198,
      "grad_norm": 1.0302625638882434,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 13198
    },
    {
      "epoch": 0.13199,
      "grad_norm": 1.2400658727503961,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 13199
    },
    {
      "epoch": 0.132,
      "grad_norm": 1.1317920385131712,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 13200
    },
    {
      "epoch": 0.13201,
      "grad_norm": 1.3070532346117103,
      "learning_rate": 0.003,
      "loss": 3.9995,
      "step": 13201
    },
    {
      "epoch": 0.13202,
      "grad_norm": 1.290994390815391,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 13202
    },
    {
      "epoch": 0.13203,
      "grad_norm": 1.3259193241549494,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 13203
    },
    {
      "epoch": 0.13204,
      "grad_norm": 1.1783424720493545,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 13204
    },
    {
      "epoch": 0.13205,
      "grad_norm": 1.170009083173674,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 13205
    },
    {
      "epoch": 0.13206,
      "grad_norm": 1.423622540381494,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 13206
    },
    {
      "epoch": 0.13207,
      "grad_norm": 1.0283146612527605,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 13207
    },
    {
      "epoch": 0.13208,
      "grad_norm": 1.2998154192550682,
      "learning_rate": 0.003,
      "loss": 3.9946,
      "step": 13208
    },
    {
      "epoch": 0.13209,
      "grad_norm": 0.873572889486228,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 13209
    },
    {
      "epoch": 0.1321,
      "grad_norm": 1.2389240668521508,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 13210
    },
    {
      "epoch": 0.13211,
      "grad_norm": 1.5130364765500839,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 13211
    },
    {
      "epoch": 0.13212,
      "grad_norm": 1.3577792356335452,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 13212
    },
    {
      "epoch": 0.13213,
      "grad_norm": 0.8591220566534142,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 13213
    },
    {
      "epoch": 0.13214,
      "grad_norm": 0.9734006922172769,
      "learning_rate": 0.003,
      "loss": 4.0078,
      "step": 13214
    },
    {
      "epoch": 0.13215,
      "grad_norm": 1.3037427579729166,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 13215
    },
    {
      "epoch": 0.13216,
      "grad_norm": 1.0660492545368943,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 13216
    },
    {
      "epoch": 0.13217,
      "grad_norm": 1.3276671787509555,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 13217
    },
    {
      "epoch": 0.13218,
      "grad_norm": 0.9912861724784381,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 13218
    },
    {
      "epoch": 0.13219,
      "grad_norm": 1.1947328423567711,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 13219
    },
    {
      "epoch": 0.1322,
      "grad_norm": 1.1324627330357286,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 13220
    },
    {
      "epoch": 0.13221,
      "grad_norm": 1.2736609006663895,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 13221
    },
    {
      "epoch": 0.13222,
      "grad_norm": 1.2523335140850878,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 13222
    },
    {
      "epoch": 0.13223,
      "grad_norm": 1.125126287078147,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 13223
    },
    {
      "epoch": 0.13224,
      "grad_norm": 1.3849418026736036,
      "learning_rate": 0.003,
      "loss": 4.0021,
      "step": 13224
    },
    {
      "epoch": 0.13225,
      "grad_norm": 1.1461342791536524,
      "learning_rate": 0.003,
      "loss": 3.9898,
      "step": 13225
    },
    {
      "epoch": 0.13226,
      "grad_norm": 1.4180290204833317,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 13226
    },
    {
      "epoch": 0.13227,
      "grad_norm": 1.014686375750144,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 13227
    },
    {
      "epoch": 0.13228,
      "grad_norm": 1.5037739153801797,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 13228
    },
    {
      "epoch": 0.13229,
      "grad_norm": 1.0446456028567737,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 13229
    },
    {
      "epoch": 0.1323,
      "grad_norm": 1.2431434834546604,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 13230
    },
    {
      "epoch": 0.13231,
      "grad_norm": 1.1595233786574235,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 13231
    },
    {
      "epoch": 0.13232,
      "grad_norm": 1.23700285851049,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 13232
    },
    {
      "epoch": 0.13233,
      "grad_norm": 1.2414313005494253,
      "learning_rate": 0.003,
      "loss": 3.9987,
      "step": 13233
    },
    {
      "epoch": 0.13234,
      "grad_norm": 1.4284272611351443,
      "learning_rate": 0.003,
      "loss": 4.012,
      "step": 13234
    },
    {
      "epoch": 0.13235,
      "grad_norm": 1.1272287828969014,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 13235
    },
    {
      "epoch": 0.13236,
      "grad_norm": 1.0883267700976904,
      "learning_rate": 0.003,
      "loss": 3.9873,
      "step": 13236
    },
    {
      "epoch": 0.13237,
      "grad_norm": 1.3095687487861305,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 13237
    },
    {
      "epoch": 0.13238,
      "grad_norm": 1.178701415006733,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 13238
    },
    {
      "epoch": 0.13239,
      "grad_norm": 1.2027726294396186,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 13239
    },
    {
      "epoch": 0.1324,
      "grad_norm": 1.2377893040922567,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 13240
    },
    {
      "epoch": 0.13241,
      "grad_norm": 1.0267119343524598,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 13241
    },
    {
      "epoch": 0.13242,
      "grad_norm": 1.3012800497905883,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 13242
    },
    {
      "epoch": 0.13243,
      "grad_norm": 1.1614741672299007,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 13243
    },
    {
      "epoch": 0.13244,
      "grad_norm": 1.2023819031296115,
      "learning_rate": 0.003,
      "loss": 3.9977,
      "step": 13244
    },
    {
      "epoch": 0.13245,
      "grad_norm": 1.1969761782597237,
      "learning_rate": 0.003,
      "loss": 3.9801,
      "step": 13245
    },
    {
      "epoch": 0.13246,
      "grad_norm": 1.1736874650380753,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 13246
    },
    {
      "epoch": 0.13247,
      "grad_norm": 1.2217478030413764,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 13247
    },
    {
      "epoch": 0.13248,
      "grad_norm": 1.195487292425333,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 13248
    },
    {
      "epoch": 0.13249,
      "grad_norm": 1.3531113906627674,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 13249
    },
    {
      "epoch": 0.1325,
      "grad_norm": 1.1331994653407587,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 13250
    },
    {
      "epoch": 0.13251,
      "grad_norm": 1.3568915508653279,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 13251
    },
    {
      "epoch": 0.13252,
      "grad_norm": 1.1022967952260512,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 13252
    },
    {
      "epoch": 0.13253,
      "grad_norm": 1.4730317933527035,
      "learning_rate": 0.003,
      "loss": 3.9895,
      "step": 13253
    },
    {
      "epoch": 0.13254,
      "grad_norm": 1.1698133513210476,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 13254
    },
    {
      "epoch": 0.13255,
      "grad_norm": 1.3006938019160277,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 13255
    },
    {
      "epoch": 0.13256,
      "grad_norm": 1.1089839963119312,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 13256
    },
    {
      "epoch": 0.13257,
      "grad_norm": 1.2055086984398902,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 13257
    },
    {
      "epoch": 0.13258,
      "grad_norm": 1.0517577625863193,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 13258
    },
    {
      "epoch": 0.13259,
      "grad_norm": 1.0260203890418644,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 13259
    },
    {
      "epoch": 0.1326,
      "grad_norm": 1.1451620545525771,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 13260
    },
    {
      "epoch": 0.13261,
      "grad_norm": 1.3573920654937885,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 13261
    },
    {
      "epoch": 0.13262,
      "grad_norm": 1.2997613200686104,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 13262
    },
    {
      "epoch": 0.13263,
      "grad_norm": 1.162070533819352,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 13263
    },
    {
      "epoch": 0.13264,
      "grad_norm": 1.1860611604132822,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 13264
    },
    {
      "epoch": 0.13265,
      "grad_norm": 1.155878268074166,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 13265
    },
    {
      "epoch": 0.13266,
      "grad_norm": 1.162897362742571,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 13266
    },
    {
      "epoch": 0.13267,
      "grad_norm": 1.2566274122158887,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 13267
    },
    {
      "epoch": 0.13268,
      "grad_norm": 1.0624053962002076,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 13268
    },
    {
      "epoch": 0.13269,
      "grad_norm": 1.146155331042088,
      "learning_rate": 0.003,
      "loss": 3.9927,
      "step": 13269
    },
    {
      "epoch": 0.1327,
      "grad_norm": 1.1523816823944344,
      "learning_rate": 0.003,
      "loss": 4.009,
      "step": 13270
    },
    {
      "epoch": 0.13271,
      "grad_norm": 1.2822840784270244,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 13271
    },
    {
      "epoch": 0.13272,
      "grad_norm": 1.0786807154247118,
      "learning_rate": 0.003,
      "loss": 3.9956,
      "step": 13272
    },
    {
      "epoch": 0.13273,
      "grad_norm": 1.218673056995643,
      "learning_rate": 0.003,
      "loss": 4.0104,
      "step": 13273
    },
    {
      "epoch": 0.13274,
      "grad_norm": 1.3254738935758086,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 13274
    },
    {
      "epoch": 0.13275,
      "grad_norm": 1.2485461958782031,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 13275
    },
    {
      "epoch": 0.13276,
      "grad_norm": 1.0339646828439346,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 13276
    },
    {
      "epoch": 0.13277,
      "grad_norm": 1.2296936563599417,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 13277
    },
    {
      "epoch": 0.13278,
      "grad_norm": 1.1719359498015458,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 13278
    },
    {
      "epoch": 0.13279,
      "grad_norm": 1.2906893737489893,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 13279
    },
    {
      "epoch": 0.1328,
      "grad_norm": 0.9747260590672138,
      "learning_rate": 0.003,
      "loss": 3.9934,
      "step": 13280
    },
    {
      "epoch": 0.13281,
      "grad_norm": 1.3295126084209,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 13281
    },
    {
      "epoch": 0.13282,
      "grad_norm": 1.1543837502411551,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 13282
    },
    {
      "epoch": 0.13283,
      "grad_norm": 1.3388418810355132,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 13283
    },
    {
      "epoch": 0.13284,
      "grad_norm": 0.9952350561662376,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 13284
    },
    {
      "epoch": 0.13285,
      "grad_norm": 1.4499501567801372,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 13285
    },
    {
      "epoch": 0.13286,
      "grad_norm": 1.066590029594129,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 13286
    },
    {
      "epoch": 0.13287,
      "grad_norm": 1.388542523326155,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 13287
    },
    {
      "epoch": 0.13288,
      "grad_norm": 1.242952241814662,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 13288
    },
    {
      "epoch": 0.13289,
      "grad_norm": 1.2844622720087564,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 13289
    },
    {
      "epoch": 0.1329,
      "grad_norm": 1.1296034429914248,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 13290
    },
    {
      "epoch": 0.13291,
      "grad_norm": 1.1817733386973799,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 13291
    },
    {
      "epoch": 0.13292,
      "grad_norm": 1.227418218840078,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 13292
    },
    {
      "epoch": 0.13293,
      "grad_norm": 1.2934766559092665,
      "learning_rate": 0.003,
      "loss": 4.0158,
      "step": 13293
    },
    {
      "epoch": 0.13294,
      "grad_norm": 1.2324210240838256,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 13294
    },
    {
      "epoch": 0.13295,
      "grad_norm": 1.1190504319413743,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 13295
    },
    {
      "epoch": 0.13296,
      "grad_norm": 1.3831470071383796,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 13296
    },
    {
      "epoch": 0.13297,
      "grad_norm": 0.9024796111158896,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 13297
    },
    {
      "epoch": 0.13298,
      "grad_norm": 1.3130099662344958,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 13298
    },
    {
      "epoch": 0.13299,
      "grad_norm": 1.1678197485533626,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 13299
    },
    {
      "epoch": 0.133,
      "grad_norm": 1.2927678212090512,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 13300
    },
    {
      "epoch": 0.13301,
      "grad_norm": 1.1463633834307947,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 13301
    },
    {
      "epoch": 0.13302,
      "grad_norm": 1.3051173709107187,
      "learning_rate": 0.003,
      "loss": 3.9841,
      "step": 13302
    },
    {
      "epoch": 0.13303,
      "grad_norm": 1.136023830327464,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 13303
    },
    {
      "epoch": 0.13304,
      "grad_norm": 1.231542541977393,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 13304
    },
    {
      "epoch": 0.13305,
      "grad_norm": 1.0420078614674835,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 13305
    },
    {
      "epoch": 0.13306,
      "grad_norm": 1.4456939860953055,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 13306
    },
    {
      "epoch": 0.13307,
      "grad_norm": 0.9463696467956895,
      "learning_rate": 0.003,
      "loss": 4.0033,
      "step": 13307
    },
    {
      "epoch": 0.13308,
      "grad_norm": 1.372066712252764,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 13308
    },
    {
      "epoch": 0.13309,
      "grad_norm": 1.2982308263401467,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 13309
    },
    {
      "epoch": 0.1331,
      "grad_norm": 1.3021473169835807,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 13310
    },
    {
      "epoch": 0.13311,
      "grad_norm": 1.1181520647256893,
      "learning_rate": 0.003,
      "loss": 4.005,
      "step": 13311
    },
    {
      "epoch": 0.13312,
      "grad_norm": 1.1323773951622496,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 13312
    },
    {
      "epoch": 0.13313,
      "grad_norm": 1.2994672754854095,
      "learning_rate": 0.003,
      "loss": 4.0088,
      "step": 13313
    },
    {
      "epoch": 0.13314,
      "grad_norm": 1.1123813052260192,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 13314
    },
    {
      "epoch": 0.13315,
      "grad_norm": 1.3356269811242296,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 13315
    },
    {
      "epoch": 0.13316,
      "grad_norm": 1.1307571245786385,
      "learning_rate": 0.003,
      "loss": 4.0078,
      "step": 13316
    },
    {
      "epoch": 0.13317,
      "grad_norm": 1.1289097193017215,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 13317
    },
    {
      "epoch": 0.13318,
      "grad_norm": 1.2008083845027793,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 13318
    },
    {
      "epoch": 0.13319,
      "grad_norm": 1.1306337491064893,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 13319
    },
    {
      "epoch": 0.1332,
      "grad_norm": 1.3679305192275055,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 13320
    },
    {
      "epoch": 0.13321,
      "grad_norm": 1.1711826264907252,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 13321
    },
    {
      "epoch": 0.13322,
      "grad_norm": 1.0966041565646945,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 13322
    },
    {
      "epoch": 0.13323,
      "grad_norm": 1.1551860383609442,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 13323
    },
    {
      "epoch": 0.13324,
      "grad_norm": 1.0761459065829688,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 13324
    },
    {
      "epoch": 0.13325,
      "grad_norm": 1.1855031922305135,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 13325
    },
    {
      "epoch": 0.13326,
      "grad_norm": 1.358087467790536,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 13326
    },
    {
      "epoch": 0.13327,
      "grad_norm": 1.5789407226797987,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 13327
    },
    {
      "epoch": 0.13328,
      "grad_norm": 1.0846902206425766,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 13328
    },
    {
      "epoch": 0.13329,
      "grad_norm": 1.166482834679772,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 13329
    },
    {
      "epoch": 0.1333,
      "grad_norm": 1.1676348442033728,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 13330
    },
    {
      "epoch": 0.13331,
      "grad_norm": 1.2387302200322514,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 13331
    },
    {
      "epoch": 0.13332,
      "grad_norm": 1.090022503966062,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 13332
    },
    {
      "epoch": 0.13333,
      "grad_norm": 1.1993557738278178,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 13333
    },
    {
      "epoch": 0.13334,
      "grad_norm": 1.149779889121198,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 13334
    },
    {
      "epoch": 0.13335,
      "grad_norm": 1.2404908878452907,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 13335
    },
    {
      "epoch": 0.13336,
      "grad_norm": 1.1097718936919116,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 13336
    },
    {
      "epoch": 0.13337,
      "grad_norm": 1.3923059951933716,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 13337
    },
    {
      "epoch": 0.13338,
      "grad_norm": 1.2188422973936779,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 13338
    },
    {
      "epoch": 0.13339,
      "grad_norm": 1.4947926128477245,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 13339
    },
    {
      "epoch": 0.1334,
      "grad_norm": 1.135167360467982,
      "learning_rate": 0.003,
      "loss": 4.0066,
      "step": 13340
    },
    {
      "epoch": 0.13341,
      "grad_norm": 1.2957735877895509,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 13341
    },
    {
      "epoch": 0.13342,
      "grad_norm": 1.0937534826464805,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 13342
    },
    {
      "epoch": 0.13343,
      "grad_norm": 1.146491272433081,
      "learning_rate": 0.003,
      "loss": 4.0115,
      "step": 13343
    },
    {
      "epoch": 0.13344,
      "grad_norm": 1.4468045189337417,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 13344
    },
    {
      "epoch": 0.13345,
      "grad_norm": 1.2592429566456962,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 13345
    },
    {
      "epoch": 0.13346,
      "grad_norm": 1.2143728986915379,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 13346
    },
    {
      "epoch": 0.13347,
      "grad_norm": 1.1905439376198783,
      "learning_rate": 0.003,
      "loss": 4.0067,
      "step": 13347
    },
    {
      "epoch": 0.13348,
      "grad_norm": 1.1443391482742789,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 13348
    },
    {
      "epoch": 0.13349,
      "grad_norm": 1.246186743392689,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 13349
    },
    {
      "epoch": 0.1335,
      "grad_norm": 1.0806035133689187,
      "learning_rate": 0.003,
      "loss": 4.0079,
      "step": 13350
    },
    {
      "epoch": 0.13351,
      "grad_norm": 1.123461988084758,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 13351
    },
    {
      "epoch": 0.13352,
      "grad_norm": 1.1992586433931847,
      "learning_rate": 0.003,
      "loss": 4.0035,
      "step": 13352
    },
    {
      "epoch": 0.13353,
      "grad_norm": 1.2287700219343816,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 13353
    },
    {
      "epoch": 0.13354,
      "grad_norm": 1.145713010144192,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 13354
    },
    {
      "epoch": 0.13355,
      "grad_norm": 1.3129522428667089,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 13355
    },
    {
      "epoch": 0.13356,
      "grad_norm": 1.1217225093906096,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 13356
    },
    {
      "epoch": 0.13357,
      "grad_norm": 1.3610017363079503,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 13357
    },
    {
      "epoch": 0.13358,
      "grad_norm": 1.0659946872902204,
      "learning_rate": 0.003,
      "loss": 4.0064,
      "step": 13358
    },
    {
      "epoch": 0.13359,
      "grad_norm": 1.4579500688790328,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 13359
    },
    {
      "epoch": 0.1336,
      "grad_norm": 1.003934981079245,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 13360
    },
    {
      "epoch": 0.13361,
      "grad_norm": 1.1638155922468505,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 13361
    },
    {
      "epoch": 0.13362,
      "grad_norm": 1.0889598114731738,
      "learning_rate": 0.003,
      "loss": 4.0107,
      "step": 13362
    },
    {
      "epoch": 0.13363,
      "grad_norm": 1.3693945445048112,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 13363
    },
    {
      "epoch": 0.13364,
      "grad_norm": 1.0988775373011543,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 13364
    },
    {
      "epoch": 0.13365,
      "grad_norm": 1.4802228295419162,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 13365
    },
    {
      "epoch": 0.13366,
      "grad_norm": 1.1350414757360303,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 13366
    },
    {
      "epoch": 0.13367,
      "grad_norm": 1.0970300311081125,
      "learning_rate": 0.003,
      "loss": 4.0027,
      "step": 13367
    },
    {
      "epoch": 0.13368,
      "grad_norm": 1.2364580439070338,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 13368
    },
    {
      "epoch": 0.13369,
      "grad_norm": 1.1393845065668011,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 13369
    },
    {
      "epoch": 0.1337,
      "grad_norm": 1.1533636621662187,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 13370
    },
    {
      "epoch": 0.13371,
      "grad_norm": 1.0816471655884832,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 13371
    },
    {
      "epoch": 0.13372,
      "grad_norm": 1.193792088943506,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 13372
    },
    {
      "epoch": 0.13373,
      "grad_norm": 1.1094174710891824,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 13373
    },
    {
      "epoch": 0.13374,
      "grad_norm": 1.2879292981648436,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 13374
    },
    {
      "epoch": 0.13375,
      "grad_norm": 1.1453786363334113,
      "learning_rate": 0.003,
      "loss": 3.9989,
      "step": 13375
    },
    {
      "epoch": 0.13376,
      "grad_norm": 1.3055465273095053,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 13376
    },
    {
      "epoch": 0.13377,
      "grad_norm": 1.1532149751861094,
      "learning_rate": 0.003,
      "loss": 4.0036,
      "step": 13377
    },
    {
      "epoch": 0.13378,
      "grad_norm": 1.2267252708924727,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 13378
    },
    {
      "epoch": 0.13379,
      "grad_norm": 1.2465260797365454,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 13379
    },
    {
      "epoch": 0.1338,
      "grad_norm": 1.1671338719435786,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 13380
    },
    {
      "epoch": 0.13381,
      "grad_norm": 1.2770301462955111,
      "learning_rate": 0.003,
      "loss": 3.9949,
      "step": 13381
    },
    {
      "epoch": 0.13382,
      "grad_norm": 1.3607247962128857,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 13382
    },
    {
      "epoch": 0.13383,
      "grad_norm": 1.036366389732506,
      "learning_rate": 0.003,
      "loss": 4.0045,
      "step": 13383
    },
    {
      "epoch": 0.13384,
      "grad_norm": 1.2065138074211352,
      "learning_rate": 0.003,
      "loss": 3.9827,
      "step": 13384
    },
    {
      "epoch": 0.13385,
      "grad_norm": 1.1857547899204484,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 13385
    },
    {
      "epoch": 0.13386,
      "grad_norm": 0.946051102788784,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 13386
    },
    {
      "epoch": 0.13387,
      "grad_norm": 1.1633476212241936,
      "learning_rate": 0.003,
      "loss": 4.0067,
      "step": 13387
    },
    {
      "epoch": 0.13388,
      "grad_norm": 1.3245812612445407,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 13388
    },
    {
      "epoch": 0.13389,
      "grad_norm": 1.134263149423251,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 13389
    },
    {
      "epoch": 0.1339,
      "grad_norm": 1.296265090606207,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 13390
    },
    {
      "epoch": 0.13391,
      "grad_norm": 1.1360413290115436,
      "learning_rate": 0.003,
      "loss": 3.9997,
      "step": 13391
    },
    {
      "epoch": 0.13392,
      "grad_norm": 1.291707542924984,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 13392
    },
    {
      "epoch": 0.13393,
      "grad_norm": 1.076626333520554,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 13393
    },
    {
      "epoch": 0.13394,
      "grad_norm": 1.2115304300049723,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 13394
    },
    {
      "epoch": 0.13395,
      "grad_norm": 1.126113520409547,
      "learning_rate": 0.003,
      "loss": 4.0046,
      "step": 13395
    },
    {
      "epoch": 0.13396,
      "grad_norm": 1.2100032511507974,
      "learning_rate": 0.003,
      "loss": 4.0036,
      "step": 13396
    },
    {
      "epoch": 0.13397,
      "grad_norm": 1.1413697699504317,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 13397
    },
    {
      "epoch": 0.13398,
      "grad_norm": 1.1369134666653362,
      "learning_rate": 0.003,
      "loss": 4.0068,
      "step": 13398
    },
    {
      "epoch": 0.13399,
      "grad_norm": 1.141769147643521,
      "learning_rate": 0.003,
      "loss": 3.9831,
      "step": 13399
    },
    {
      "epoch": 0.134,
      "grad_norm": 1.351880588084034,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 13400
    },
    {
      "epoch": 0.13401,
      "grad_norm": 1.0694809056532624,
      "learning_rate": 0.003,
      "loss": 4.0098,
      "step": 13401
    },
    {
      "epoch": 0.13402,
      "grad_norm": 1.2462804234100007,
      "learning_rate": 0.003,
      "loss": 3.9751,
      "step": 13402
    },
    {
      "epoch": 0.13403,
      "grad_norm": 0.9550894770645565,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 13403
    },
    {
      "epoch": 0.13404,
      "grad_norm": 1.4004013738063046,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 13404
    },
    {
      "epoch": 0.13405,
      "grad_norm": 1.0182565499258522,
      "learning_rate": 0.003,
      "loss": 4.008,
      "step": 13405
    },
    {
      "epoch": 0.13406,
      "grad_norm": 1.3389242782316388,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 13406
    },
    {
      "epoch": 0.13407,
      "grad_norm": 1.265463794551261,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 13407
    },
    {
      "epoch": 0.13408,
      "grad_norm": 1.334487167757332,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 13408
    },
    {
      "epoch": 0.13409,
      "grad_norm": 1.3576258794005125,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 13409
    },
    {
      "epoch": 0.1341,
      "grad_norm": 1.2184686143115655,
      "learning_rate": 0.003,
      "loss": 4.003,
      "step": 13410
    },
    {
      "epoch": 0.13411,
      "grad_norm": 0.9916996275319055,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 13411
    },
    {
      "epoch": 0.13412,
      "grad_norm": 1.2216044612518433,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 13412
    },
    {
      "epoch": 0.13413,
      "grad_norm": 1.160783345128535,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 13413
    },
    {
      "epoch": 0.13414,
      "grad_norm": 1.1292722771811052,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 13414
    },
    {
      "epoch": 0.13415,
      "grad_norm": 1.126718355717055,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 13415
    },
    {
      "epoch": 0.13416,
      "grad_norm": 1.2476269931115544,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 13416
    },
    {
      "epoch": 0.13417,
      "grad_norm": 1.2048295152923227,
      "learning_rate": 0.003,
      "loss": 4.0012,
      "step": 13417
    },
    {
      "epoch": 0.13418,
      "grad_norm": 1.3428948792281432,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 13418
    },
    {
      "epoch": 0.13419,
      "grad_norm": 1.1181666148157166,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 13419
    },
    {
      "epoch": 0.1342,
      "grad_norm": 1.3215144632242453,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 13420
    },
    {
      "epoch": 0.13421,
      "grad_norm": 1.3097747341982395,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 13421
    },
    {
      "epoch": 0.13422,
      "grad_norm": 1.198201371504427,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 13422
    },
    {
      "epoch": 0.13423,
      "grad_norm": 1.2244440779414874,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 13423
    },
    {
      "epoch": 0.13424,
      "grad_norm": 0.9789279406920552,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 13424
    },
    {
      "epoch": 0.13425,
      "grad_norm": 1.419037471910816,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 13425
    },
    {
      "epoch": 0.13426,
      "grad_norm": 1.0024421860356587,
      "learning_rate": 0.003,
      "loss": 3.9926,
      "step": 13426
    },
    {
      "epoch": 0.13427,
      "grad_norm": 1.236195542106977,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 13427
    },
    {
      "epoch": 0.13428,
      "grad_norm": 1.2230106966663075,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 13428
    },
    {
      "epoch": 0.13429,
      "grad_norm": 1.2019836590046975,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 13429
    },
    {
      "epoch": 0.1343,
      "grad_norm": 1.4494027288332898,
      "learning_rate": 0.003,
      "loss": 4.0067,
      "step": 13430
    },
    {
      "epoch": 0.13431,
      "grad_norm": 1.192145526605227,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 13431
    },
    {
      "epoch": 0.13432,
      "grad_norm": 1.1248484191707095,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 13432
    },
    {
      "epoch": 0.13433,
      "grad_norm": 1.260785974024862,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 13433
    },
    {
      "epoch": 0.13434,
      "grad_norm": 1.0400060793363335,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 13434
    },
    {
      "epoch": 0.13435,
      "grad_norm": 1.0783745236421138,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 13435
    },
    {
      "epoch": 0.13436,
      "grad_norm": 1.165074049861491,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 13436
    },
    {
      "epoch": 0.13437,
      "grad_norm": 1.162590117493891,
      "learning_rate": 0.003,
      "loss": 4.0118,
      "step": 13437
    },
    {
      "epoch": 0.13438,
      "grad_norm": 1.0904670998164137,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 13438
    },
    {
      "epoch": 0.13439,
      "grad_norm": 1.3044658478058948,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 13439
    },
    {
      "epoch": 0.1344,
      "grad_norm": 1.2585749984772712,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 13440
    },
    {
      "epoch": 0.13441,
      "grad_norm": 1.1350577146238163,
      "learning_rate": 0.003,
      "loss": 4.004,
      "step": 13441
    },
    {
      "epoch": 0.13442,
      "grad_norm": 1.2934824566661551,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 13442
    },
    {
      "epoch": 0.13443,
      "grad_norm": 1.1704384561374903,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 13443
    },
    {
      "epoch": 0.13444,
      "grad_norm": 1.2287067692197493,
      "learning_rate": 0.003,
      "loss": 4.0083,
      "step": 13444
    },
    {
      "epoch": 0.13445,
      "grad_norm": 1.052312126159789,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 13445
    },
    {
      "epoch": 0.13446,
      "grad_norm": 1.3351631871063565,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 13446
    },
    {
      "epoch": 0.13447,
      "grad_norm": 1.1456741213952397,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 13447
    },
    {
      "epoch": 0.13448,
      "grad_norm": 1.3178994702812536,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 13448
    },
    {
      "epoch": 0.13449,
      "grad_norm": 1.1376435500601898,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 13449
    },
    {
      "epoch": 0.1345,
      "grad_norm": 1.5577330564326097,
      "learning_rate": 0.003,
      "loss": 3.9742,
      "step": 13450
    },
    {
      "epoch": 0.13451,
      "grad_norm": 1.240799192639951,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 13451
    },
    {
      "epoch": 0.13452,
      "grad_norm": 1.3303368261452497,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 13452
    },
    {
      "epoch": 0.13453,
      "grad_norm": 0.9864499235298809,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 13453
    },
    {
      "epoch": 0.13454,
      "grad_norm": 1.3913306919812294,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 13454
    },
    {
      "epoch": 0.13455,
      "grad_norm": 0.9262772369745972,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 13455
    },
    {
      "epoch": 0.13456,
      "grad_norm": 1.1715238243590518,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 13456
    },
    {
      "epoch": 0.13457,
      "grad_norm": 1.007775791407712,
      "learning_rate": 0.003,
      "loss": 3.9987,
      "step": 13457
    },
    {
      "epoch": 0.13458,
      "grad_norm": 1.4373867057512402,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 13458
    },
    {
      "epoch": 0.13459,
      "grad_norm": 1.2087819192138398,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 13459
    },
    {
      "epoch": 0.1346,
      "grad_norm": 1.2761546089109206,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 13460
    },
    {
      "epoch": 0.13461,
      "grad_norm": 1.3195149048198116,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 13461
    },
    {
      "epoch": 0.13462,
      "grad_norm": 1.4298685154422084,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 13462
    },
    {
      "epoch": 0.13463,
      "grad_norm": 1.0232005094391101,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 13463
    },
    {
      "epoch": 0.13464,
      "grad_norm": 1.3554979757755057,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 13464
    },
    {
      "epoch": 0.13465,
      "grad_norm": 1.1332202658140447,
      "learning_rate": 0.003,
      "loss": 4.0048,
      "step": 13465
    },
    {
      "epoch": 0.13466,
      "grad_norm": 1.3885916310764128,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 13466
    },
    {
      "epoch": 0.13467,
      "grad_norm": 1.1009272857461163,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 13467
    },
    {
      "epoch": 0.13468,
      "grad_norm": 1.291927423311447,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 13468
    },
    {
      "epoch": 0.13469,
      "grad_norm": 1.2553231921644208,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 13469
    },
    {
      "epoch": 0.1347,
      "grad_norm": 1.5595510592158073,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 13470
    },
    {
      "epoch": 0.13471,
      "grad_norm": 1.4597265580853351,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 13471
    },
    {
      "epoch": 0.13472,
      "grad_norm": 1.06169793876242,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 13472
    },
    {
      "epoch": 0.13473,
      "grad_norm": 1.1661843523771311,
      "learning_rate": 0.003,
      "loss": 3.9915,
      "step": 13473
    },
    {
      "epoch": 0.13474,
      "grad_norm": 1.3141625224505493,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 13474
    },
    {
      "epoch": 0.13475,
      "grad_norm": 1.0191480284471333,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 13475
    },
    {
      "epoch": 0.13476,
      "grad_norm": 1.3593921109749711,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 13476
    },
    {
      "epoch": 0.13477,
      "grad_norm": 1.029868013350082,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 13477
    },
    {
      "epoch": 0.13478,
      "grad_norm": 1.220297263485053,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 13478
    },
    {
      "epoch": 0.13479,
      "grad_norm": 1.2216981759838992,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 13479
    },
    {
      "epoch": 0.1348,
      "grad_norm": 1.1588932875116496,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 13480
    },
    {
      "epoch": 0.13481,
      "grad_norm": 1.04814139318589,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 13481
    },
    {
      "epoch": 0.13482,
      "grad_norm": 1.27331759602881,
      "learning_rate": 0.003,
      "loss": 4.0098,
      "step": 13482
    },
    {
      "epoch": 0.13483,
      "grad_norm": 1.2024848177421057,
      "learning_rate": 0.003,
      "loss": 3.9871,
      "step": 13483
    },
    {
      "epoch": 0.13484,
      "grad_norm": 1.1441160988818182,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 13484
    },
    {
      "epoch": 0.13485,
      "grad_norm": 1.2492437430898804,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 13485
    },
    {
      "epoch": 0.13486,
      "grad_norm": 1.2067132902882773,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 13486
    },
    {
      "epoch": 0.13487,
      "grad_norm": 1.1942540105040946,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 13487
    },
    {
      "epoch": 0.13488,
      "grad_norm": 1.3019162946617349,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 13488
    },
    {
      "epoch": 0.13489,
      "grad_norm": 1.076361817110379,
      "learning_rate": 0.003,
      "loss": 4.0028,
      "step": 13489
    },
    {
      "epoch": 0.1349,
      "grad_norm": 1.0367542395991136,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 13490
    },
    {
      "epoch": 0.13491,
      "grad_norm": 1.1496630472442426,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 13491
    },
    {
      "epoch": 0.13492,
      "grad_norm": 1.4714537384829591,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 13492
    },
    {
      "epoch": 0.13493,
      "grad_norm": 1.2095126352691563,
      "learning_rate": 0.003,
      "loss": 4.0019,
      "step": 13493
    },
    {
      "epoch": 0.13494,
      "grad_norm": 1.2689510126659043,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 13494
    },
    {
      "epoch": 0.13495,
      "grad_norm": 1.1657755327513644,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 13495
    },
    {
      "epoch": 0.13496,
      "grad_norm": 1.1697449439005412,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 13496
    },
    {
      "epoch": 0.13497,
      "grad_norm": 1.2346266760881337,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 13497
    },
    {
      "epoch": 0.13498,
      "grad_norm": 1.6136535135218877,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 13498
    },
    {
      "epoch": 0.13499,
      "grad_norm": 1.1067394699171362,
      "learning_rate": 0.003,
      "loss": 3.9888,
      "step": 13499
    },
    {
      "epoch": 0.135,
      "grad_norm": 1.266309341268125,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 13500
    },
    {
      "epoch": 0.13501,
      "grad_norm": 1.2728591837186685,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 13501
    },
    {
      "epoch": 0.13502,
      "grad_norm": 1.2093142337945428,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 13502
    },
    {
      "epoch": 0.13503,
      "grad_norm": 1.0891243791379945,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 13503
    },
    {
      "epoch": 0.13504,
      "grad_norm": 1.121546991957497,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 13504
    },
    {
      "epoch": 0.13505,
      "grad_norm": 1.290359557449459,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 13505
    },
    {
      "epoch": 0.13506,
      "grad_norm": 1.1792088578482778,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 13506
    },
    {
      "epoch": 0.13507,
      "grad_norm": 1.2456337380797515,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 13507
    },
    {
      "epoch": 0.13508,
      "grad_norm": 1.4020559997019646,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 13508
    },
    {
      "epoch": 0.13509,
      "grad_norm": 1.1533109080710795,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 13509
    },
    {
      "epoch": 0.1351,
      "grad_norm": 1.2745718197348026,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 13510
    },
    {
      "epoch": 0.13511,
      "grad_norm": 1.3014225992516306,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 13511
    },
    {
      "epoch": 0.13512,
      "grad_norm": 1.0258185120614944,
      "learning_rate": 0.003,
      "loss": 4.0151,
      "step": 13512
    },
    {
      "epoch": 0.13513,
      "grad_norm": 1.2987664783952992,
      "learning_rate": 0.003,
      "loss": 4.0146,
      "step": 13513
    },
    {
      "epoch": 0.13514,
      "grad_norm": 1.0301254210890254,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 13514
    },
    {
      "epoch": 0.13515,
      "grad_norm": 1.4063675226394983,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 13515
    },
    {
      "epoch": 0.13516,
      "grad_norm": 1.06062614177715,
      "learning_rate": 0.003,
      "loss": 3.9832,
      "step": 13516
    },
    {
      "epoch": 0.13517,
      "grad_norm": 1.2326757174346914,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 13517
    },
    {
      "epoch": 0.13518,
      "grad_norm": 0.9967612734492626,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 13518
    },
    {
      "epoch": 0.13519,
      "grad_norm": 1.3638494530839866,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 13519
    },
    {
      "epoch": 0.1352,
      "grad_norm": 0.9993900606979094,
      "learning_rate": 0.003,
      "loss": 3.997,
      "step": 13520
    },
    {
      "epoch": 0.13521,
      "grad_norm": 1.2647391132461463,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 13521
    },
    {
      "epoch": 0.13522,
      "grad_norm": 1.076840147795485,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 13522
    },
    {
      "epoch": 0.13523,
      "grad_norm": 1.536667316815823,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 13523
    },
    {
      "epoch": 0.13524,
      "grad_norm": 1.1907360604879682,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 13524
    },
    {
      "epoch": 0.13525,
      "grad_norm": 1.1183691818173742,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 13525
    },
    {
      "epoch": 0.13526,
      "grad_norm": 1.1958633957563631,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 13526
    },
    {
      "epoch": 0.13527,
      "grad_norm": 1.1733058300297852,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 13527
    },
    {
      "epoch": 0.13528,
      "grad_norm": 1.2818225476163603,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 13528
    },
    {
      "epoch": 0.13529,
      "grad_norm": 1.40427771743178,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 13529
    },
    {
      "epoch": 0.1353,
      "grad_norm": 1.0559809394022808,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 13530
    },
    {
      "epoch": 0.13531,
      "grad_norm": 1.2141375395885508,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 13531
    },
    {
      "epoch": 0.13532,
      "grad_norm": 1.1631699152071382,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 13532
    },
    {
      "epoch": 0.13533,
      "grad_norm": 1.1435788324807838,
      "learning_rate": 0.003,
      "loss": 4.0064,
      "step": 13533
    },
    {
      "epoch": 0.13534,
      "grad_norm": 1.0902001010477733,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 13534
    },
    {
      "epoch": 0.13535,
      "grad_norm": 1.3308338913254787,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 13535
    },
    {
      "epoch": 0.13536,
      "grad_norm": 1.1546192103612023,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 13536
    },
    {
      "epoch": 0.13537,
      "grad_norm": 1.0527740548229891,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 13537
    },
    {
      "epoch": 0.13538,
      "grad_norm": 1.3938719152318257,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 13538
    },
    {
      "epoch": 0.13539,
      "grad_norm": 1.294181469325066,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 13539
    },
    {
      "epoch": 0.1354,
      "grad_norm": 1.3313605648250066,
      "learning_rate": 0.003,
      "loss": 4.0144,
      "step": 13540
    },
    {
      "epoch": 0.13541,
      "grad_norm": 1.1090694180026441,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 13541
    },
    {
      "epoch": 0.13542,
      "grad_norm": 1.2566169447470952,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 13542
    },
    {
      "epoch": 0.13543,
      "grad_norm": 1.1364640575819667,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 13543
    },
    {
      "epoch": 0.13544,
      "grad_norm": 1.238508893201822,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 13544
    },
    {
      "epoch": 0.13545,
      "grad_norm": 1.3426050575470458,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 13545
    },
    {
      "epoch": 0.13546,
      "grad_norm": 1.1224828710678576,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 13546
    },
    {
      "epoch": 0.13547,
      "grad_norm": 1.4489054917469295,
      "learning_rate": 0.003,
      "loss": 3.9953,
      "step": 13547
    },
    {
      "epoch": 0.13548,
      "grad_norm": 0.9563686440940138,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 13548
    },
    {
      "epoch": 0.13549,
      "grad_norm": 1.2742319396351343,
      "learning_rate": 0.003,
      "loss": 4.0084,
      "step": 13549
    },
    {
      "epoch": 0.1355,
      "grad_norm": 1.2331965104642149,
      "learning_rate": 0.003,
      "loss": 4.0065,
      "step": 13550
    },
    {
      "epoch": 0.13551,
      "grad_norm": 1.1910295938637434,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 13551
    },
    {
      "epoch": 0.13552,
      "grad_norm": 1.2063895895243764,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 13552
    },
    {
      "epoch": 0.13553,
      "grad_norm": 1.3495585085551443,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 13553
    },
    {
      "epoch": 0.13554,
      "grad_norm": 1.3261178312946502,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 13554
    },
    {
      "epoch": 0.13555,
      "grad_norm": 1.2671382357999805,
      "learning_rate": 0.003,
      "loss": 3.9939,
      "step": 13555
    },
    {
      "epoch": 0.13556,
      "grad_norm": 1.013440546564239,
      "learning_rate": 0.003,
      "loss": 4.0076,
      "step": 13556
    },
    {
      "epoch": 0.13557,
      "grad_norm": 1.358405967910683,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 13557
    },
    {
      "epoch": 0.13558,
      "grad_norm": 0.9529425587072567,
      "learning_rate": 0.003,
      "loss": 3.9979,
      "step": 13558
    },
    {
      "epoch": 0.13559,
      "grad_norm": 1.3926067932351842,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 13559
    },
    {
      "epoch": 0.1356,
      "grad_norm": 1.1927999409657768,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 13560
    },
    {
      "epoch": 0.13561,
      "grad_norm": 1.2975418476490068,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 13561
    },
    {
      "epoch": 0.13562,
      "grad_norm": 1.268040128104862,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 13562
    },
    {
      "epoch": 0.13563,
      "grad_norm": 1.068965547878699,
      "learning_rate": 0.003,
      "loss": 3.9682,
      "step": 13563
    },
    {
      "epoch": 0.13564,
      "grad_norm": 1.2462949090924584,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 13564
    },
    {
      "epoch": 0.13565,
      "grad_norm": 1.0262326038063978,
      "learning_rate": 0.003,
      "loss": 4.0078,
      "step": 13565
    },
    {
      "epoch": 0.13566,
      "grad_norm": 1.241331896159747,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 13566
    },
    {
      "epoch": 0.13567,
      "grad_norm": 1.106533598252521,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 13567
    },
    {
      "epoch": 0.13568,
      "grad_norm": 1.2845913986916169,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 13568
    },
    {
      "epoch": 0.13569,
      "grad_norm": 1.0934200649882952,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 13569
    },
    {
      "epoch": 0.1357,
      "grad_norm": 1.3469028866488575,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 13570
    },
    {
      "epoch": 0.13571,
      "grad_norm": 1.1323505460539465,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 13571
    },
    {
      "epoch": 0.13572,
      "grad_norm": 1.3450317600710857,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 13572
    },
    {
      "epoch": 0.13573,
      "grad_norm": 1.217539370131012,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 13573
    },
    {
      "epoch": 0.13574,
      "grad_norm": 1.3619147322432303,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 13574
    },
    {
      "epoch": 0.13575,
      "grad_norm": 1.2943028459713686,
      "learning_rate": 0.003,
      "loss": 3.9969,
      "step": 13575
    },
    {
      "epoch": 0.13576,
      "grad_norm": 1.056776897622436,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 13576
    },
    {
      "epoch": 0.13577,
      "grad_norm": 1.372028195433907,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 13577
    },
    {
      "epoch": 0.13578,
      "grad_norm": 1.0412699789951014,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 13578
    },
    {
      "epoch": 0.13579,
      "grad_norm": 1.375918893580882,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 13579
    },
    {
      "epoch": 0.1358,
      "grad_norm": 1.0483388215012421,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 13580
    },
    {
      "epoch": 0.13581,
      "grad_norm": 1.2980414345055213,
      "learning_rate": 0.003,
      "loss": 4.0061,
      "step": 13581
    },
    {
      "epoch": 0.13582,
      "grad_norm": 1.0353181140648553,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 13582
    },
    {
      "epoch": 0.13583,
      "grad_norm": 1.265469273405755,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 13583
    },
    {
      "epoch": 0.13584,
      "grad_norm": 1.0112357228896525,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 13584
    },
    {
      "epoch": 0.13585,
      "grad_norm": 1.343005759231974,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 13585
    },
    {
      "epoch": 0.13586,
      "grad_norm": 1.2175197887500093,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13586
    },
    {
      "epoch": 0.13587,
      "grad_norm": 1.179478814557246,
      "learning_rate": 0.003,
      "loss": 4.007,
      "step": 13587
    },
    {
      "epoch": 0.13588,
      "grad_norm": 1.307292876356786,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 13588
    },
    {
      "epoch": 0.13589,
      "grad_norm": 1.2162754598432555,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 13589
    },
    {
      "epoch": 0.1359,
      "grad_norm": 1.2257717680755864,
      "learning_rate": 0.003,
      "loss": 3.9964,
      "step": 13590
    },
    {
      "epoch": 0.13591,
      "grad_norm": 1.3668360738196097,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 13591
    },
    {
      "epoch": 0.13592,
      "grad_norm": 1.1463625785724558,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 13592
    },
    {
      "epoch": 0.13593,
      "grad_norm": 1.2555792951194609,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 13593
    },
    {
      "epoch": 0.13594,
      "grad_norm": 1.136425816545672,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 13594
    },
    {
      "epoch": 0.13595,
      "grad_norm": 1.3017380885036873,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 13595
    },
    {
      "epoch": 0.13596,
      "grad_norm": 1.2390442231201604,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 13596
    },
    {
      "epoch": 0.13597,
      "grad_norm": 1.0792428908460772,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 13597
    },
    {
      "epoch": 0.13598,
      "grad_norm": 1.1627665380886698,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 13598
    },
    {
      "epoch": 0.13599,
      "grad_norm": 1.0891261570324686,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 13599
    },
    {
      "epoch": 0.136,
      "grad_norm": 1.3290981851394779,
      "learning_rate": 0.003,
      "loss": 3.9806,
      "step": 13600
    },
    {
      "epoch": 0.13601,
      "grad_norm": 1.1191130692722162,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 13601
    },
    {
      "epoch": 0.13602,
      "grad_norm": 1.14018704398269,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 13602
    },
    {
      "epoch": 0.13603,
      "grad_norm": 1.4134013940831658,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 13603
    },
    {
      "epoch": 0.13604,
      "grad_norm": 0.9919395717353615,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 13604
    },
    {
      "epoch": 0.13605,
      "grad_norm": 1.4685076537988184,
      "learning_rate": 0.003,
      "loss": 3.9796,
      "step": 13605
    },
    {
      "epoch": 0.13606,
      "grad_norm": 1.3043209564442602,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 13606
    },
    {
      "epoch": 0.13607,
      "grad_norm": 1.2720661225646608,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 13607
    },
    {
      "epoch": 0.13608,
      "grad_norm": 1.1218482561071894,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 13608
    },
    {
      "epoch": 0.13609,
      "grad_norm": 1.1870233607488427,
      "learning_rate": 0.003,
      "loss": 3.9998,
      "step": 13609
    },
    {
      "epoch": 0.1361,
      "grad_norm": 1.4120658302714315,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 13610
    },
    {
      "epoch": 0.13611,
      "grad_norm": 1.068516624222778,
      "learning_rate": 0.003,
      "loss": 4.0096,
      "step": 13611
    },
    {
      "epoch": 0.13612,
      "grad_norm": 1.245146046438301,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 13612
    },
    {
      "epoch": 0.13613,
      "grad_norm": 1.145508954883413,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 13613
    },
    {
      "epoch": 0.13614,
      "grad_norm": 1.055471911251265,
      "learning_rate": 0.003,
      "loss": 4.0072,
      "step": 13614
    },
    {
      "epoch": 0.13615,
      "grad_norm": 1.1450571637294198,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 13615
    },
    {
      "epoch": 0.13616,
      "grad_norm": 1.148179332102041,
      "learning_rate": 0.003,
      "loss": 4.0086,
      "step": 13616
    },
    {
      "epoch": 0.13617,
      "grad_norm": 1.2645193541346786,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 13617
    },
    {
      "epoch": 0.13618,
      "grad_norm": 1.3830179107987441,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 13618
    },
    {
      "epoch": 0.13619,
      "grad_norm": 1.1986654364411988,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 13619
    },
    {
      "epoch": 0.1362,
      "grad_norm": 1.1818480902296788,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 13620
    },
    {
      "epoch": 0.13621,
      "grad_norm": 1.1745157716112689,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 13621
    },
    {
      "epoch": 0.13622,
      "grad_norm": 1.1066332563589678,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 13622
    },
    {
      "epoch": 0.13623,
      "grad_norm": 1.3645281692577584,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 13623
    },
    {
      "epoch": 0.13624,
      "grad_norm": 1.1929185751443674,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 13624
    },
    {
      "epoch": 0.13625,
      "grad_norm": 1.4248611420754493,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 13625
    },
    {
      "epoch": 0.13626,
      "grad_norm": 1.186299479007345,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 13626
    },
    {
      "epoch": 0.13627,
      "grad_norm": 1.0788810221878606,
      "learning_rate": 0.003,
      "loss": 3.9985,
      "step": 13627
    },
    {
      "epoch": 0.13628,
      "grad_norm": 1.2977698017293953,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 13628
    },
    {
      "epoch": 0.13629,
      "grad_norm": 1.07972580227147,
      "learning_rate": 0.003,
      "loss": 4.0049,
      "step": 13629
    },
    {
      "epoch": 0.1363,
      "grad_norm": 1.1688938042282193,
      "learning_rate": 0.003,
      "loss": 3.986,
      "step": 13630
    },
    {
      "epoch": 0.13631,
      "grad_norm": 1.3352889662898282,
      "learning_rate": 0.003,
      "loss": 4.0086,
      "step": 13631
    },
    {
      "epoch": 0.13632,
      "grad_norm": 1.1365058041036933,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 13632
    },
    {
      "epoch": 0.13633,
      "grad_norm": 1.315315923471128,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 13633
    },
    {
      "epoch": 0.13634,
      "grad_norm": 1.0931804813692574,
      "learning_rate": 0.003,
      "loss": 4.0071,
      "step": 13634
    },
    {
      "epoch": 0.13635,
      "grad_norm": 1.264910695283981,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 13635
    },
    {
      "epoch": 0.13636,
      "grad_norm": 1.2708625560327722,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 13636
    },
    {
      "epoch": 0.13637,
      "grad_norm": 0.9990499280248041,
      "learning_rate": 0.003,
      "loss": 3.997,
      "step": 13637
    },
    {
      "epoch": 0.13638,
      "grad_norm": 1.2961716625668893,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 13638
    },
    {
      "epoch": 0.13639,
      "grad_norm": 1.1163179620633081,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 13639
    },
    {
      "epoch": 0.1364,
      "grad_norm": 1.1076994011515442,
      "learning_rate": 0.003,
      "loss": 4.0075,
      "step": 13640
    },
    {
      "epoch": 0.13641,
      "grad_norm": 1.1762842182555493,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 13641
    },
    {
      "epoch": 0.13642,
      "grad_norm": 1.1464394748013567,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 13642
    },
    {
      "epoch": 0.13643,
      "grad_norm": 1.2598077685915399,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 13643
    },
    {
      "epoch": 0.13644,
      "grad_norm": 1.060741487532991,
      "learning_rate": 0.003,
      "loss": 3.9941,
      "step": 13644
    },
    {
      "epoch": 0.13645,
      "grad_norm": 1.377870157370045,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 13645
    },
    {
      "epoch": 0.13646,
      "grad_norm": 1.1109566861432008,
      "learning_rate": 0.003,
      "loss": 3.9889,
      "step": 13646
    },
    {
      "epoch": 0.13647,
      "grad_norm": 1.3433279023666778,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 13647
    },
    {
      "epoch": 0.13648,
      "grad_norm": 0.9491736753233733,
      "learning_rate": 0.003,
      "loss": 3.9852,
      "step": 13648
    },
    {
      "epoch": 0.13649,
      "grad_norm": 1.5014629142589342,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 13649
    },
    {
      "epoch": 0.1365,
      "grad_norm": 1.2241394783719237,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 13650
    }
  ],
  "logging_steps": 1,
  "max_steps": 100000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 5.411357249765376e+17,
  "train_batch_size": 256,
  "trial_name": null,
  "trial_params": null
}